9 from sly
import Lexer, Parser
14 Finds matching control token in string and returns the offset.
15 The string's first character must match openchar.
16 Otherwise, 0 is returned.
20 openchar (str): opening char e.g '{'
21 closechar (str): closing char e.g '}'
24 int: position of matching closing char in string.
27 if string[0] == openchar:
29 while end < len(string)
and count > 0:
30 if string[end] == openchar:
32 elif string[end] == closechar:
36 raise SyntaxError(
"Matched parenthesis for metavariable could not be found.")
42 Lexer class responsible for changing the default scanning behavior.
43 It disables token scanning and captures
44 everything within the matched parenthesis.
45 Call pop_state to return to the default scanning state.
48 tokens = {ARGUMENTTUPLE, }
53 Capture metavariable parameters by finding the matched parenthesis.
56 t (sly.lex.token): token of type LPAREN
59 t (sly.lex.token): ARGUMENTTUPLE token
64 t.value = self.text[self.
indexindex-1: self.
indexindex+pos]
75 Class responsible for scanning the cut and generating a stream of tokens.
76 The token stream can be passed to `B2Parser` to generate a syntax tree.
80 """Initialize Lexer"""
93 EQUALEQUAL, GREATEREQUAL, LESSEQUAL, GREATER, LESS,
100 DOUBLE, INTEGER, IDENTIFIER, BOOLEAN,
102 POWER, TIMES, DIVIDE, PLUS, MINUS
105 tokens = expression_tokens.union(cut_tokens)
148 Scan opening bracket.
151 t (sly.lex.token): token of type LBRACK
154 SyntaxError: if no following closing bracket is found
158 Pushes 'BRACK' onto control_token_stack
163 if "]" not in self.text[self.index:]:
164 raise SyntaxError(
"Unmatched '[' in cut.")
171 Scan closing bracket.
174 t (sly.lex.token): token of type RBRACK
177 SyntaxError: 1. If control_token_stack is empty, which means
178 no bracket was opened previously.
179 2. If state of control_token_stack is 'PAREN', which
180 means a closing parenthesis is expected.
183 Pops object from control_token_stack
191 raise SyntaxError(
"Unmatched ']' in cut.")
194 elif state ==
"PAREN":
195 raise SyntaxError(
"Illegal ']', expected ')'.")
200 Scan opening parenthesis.
203 t (sly.lex.token): token of type LPAREN
206 SyntaxError: if no following closing parenthesis is found
210 Pushes 'PAREN' onto control_token_stack
215 if ")" not in self.text[self.index:]:
216 raise SyntaxError(
"Unmatched '('")
220 @_(r"\)") # noqa: F821
223 Scan closing parenthesis.
226 t (sly.lex.token): token of type RPAREN
229 SyntaxError: 1. If control_token_stack is empty, which means
230 no parenthesis was opened previously.
231 2. If state of control_token_stack is 'BRACK', which
232 means a closing bracket is expected.
235 Pops state from control_token_stack
243 raise SyntaxError(
"Unmatched ')' in cut.")
245 raise SyntaxError(
"Illegal ')', expected ']'.")
246 elif state ==
"PAREN":
249 @_(r"((\d+\.\d*|\d*\.\d+)(e(-|\+)?\d+|E(-|\+)?\d+)?|\d+(e(-|\+)?\d+|E(-|\+)?\d+))
") # noqa: E501, F821
252 Scanning function for double values
255 t (sly.lex.Token): initial token generated by the scanner library.
256 The value attribute is of type str initially, equals
257 the matched sequence and is casted to float.
259 Possible notations covered by this regular expression:
260 Normal decimal notation e.g 0.1
261 Hanging decimal seperator notation e.g 1.
262 Preceding decimal seperator notation e.g .1
263 Scientific notation with (signed) exponents e.g 1.0E4, 1.e-4, .1E+3
264 Exponents are case insensitive e.g 1.e4, 1.E4
265 Integer with exponent e.g 1E4
270 t.value = float(t.value)
273 @_(r"(0(x|X)[0-9A-Fa-f]+)|\d+
") # noqa: F821
274 def INTEGER(self, t):
276 Scanning function for integer values
277 Allows normal and hex notation (case insensitive)
280 t (sly.lex.Token): initial token generated by the scanner library.
281 The value attribute is of type str initially, equals
282 the matched sequence and is casted to int.
285 python int-objects are converted
286 to the standard c++ int datatype (32bit).
287 Overflows can happen because numerical limits
288 of python int and c++ int datatypes differ.
289 If you need to input large values write it as double.
295 t.value = int(t.value)
298 t.value = int(t.value, base=16)
301 @_(r"[a-zA-Z_][a-zA-Z_0-9]*")
302 def IDENTIFIER(self, t):
304 Scaning function for identifiers
306 If a matched sequence equals reserved keywords of other tokens
307 the token type and value is remapped via the reserved dictionary.
310 t (sly.lex.Token): initial token generated by the scanner library.
311 value attribute equals the matched sequence.
325 "infinity":
"DOUBLE",
329 t.type = reserved.get(t.value,
"IDENTIFIER")
332 if t.type ==
"BOOLEAN":
333 t.value = t.value ==
"True" or t.value ==
"true"
335 if t.type ==
"DOUBLE":
336 t.value = float(t.value)
337 if t.type ==
"IDENTIFIER":
339 if self.text[self.index] ==
"(":
341 if ")" not in self.text[self.index:]:
342 raise SyntaxError(
"Unmatched '('")
344 self.push_state(B2ParameterLexer)
350 def parser_class_decorator(cls, parser_type):
352 Class decorator which allows creating a Parser class object
353 for the B2Parser and B2ExpressionParser without repeating the class body.
356 parser_type (str): choice of parser type, 'cut' or 'expression'
359 (type): returns a parser class object
361 assert parser_type
in (
364 ),
"Invalid parser type, valid choices are 'cut' or 'expression'"
366 class B2ParserMixin(cls):
368 Parser class implementing the grammar specified below.
370 Full Grammar Specification:
372 | <boolean_expression>
374 <boolean_expression> ::= <disjunction>
376 <disjunction> ::= <conjunction>
377 | <disjunction> OR <conjunction>
379 <conjunction> ::= <negation>
380 | <conjunction> AND <negation>
382 <negation> ::= <bracket_expression>
385 <bracket_expression> ::= <relational_expression>
386 | LBRACK <boolean_expression> RBRACK
388 <relational_expression> ::= <expression>
389 | <expression> <comparison_operator> <expression>
390 | <expression> <comparison_operator> <expression>
391 <comparison_operator> <expression>
393 <comparison_operator> ::= EQUALEQUAL
400 <expression> ::= <sum>
407 | <term> TIMES <factor>
408 | <term> DIVIDE <factor>
414 <power> ::= <primary>
415 | <primary> POWER <factor>
417 <primary> ::= LPAREN <expression> RPAREN
424 <function> ::= IDENTIFIER ARGUMENTTUPLE
430 @param verbose run parser in verbose mode. The nodetype names in
431 the parsed tuple are written out and not encoded
432 as integers. Useful for debugging parsing errors.
438 self.verbose = verbose
441 self.parameter_stack = list()
443 if parser_type ==
"cut":
445 tokens = B2Lexer.tokens.union(B2ParameterLexer.tokens)
448 tokens = B2Lexer.expression_tokens.union(B2ParameterLexer.tokens)
452 if parser_type ==
"cut":
458 (
"left",
"EQUALEQUAL",
"GREATER",
"LESS",
459 "GREATEREQUAL",
"LESSEQUAL",
"NOTEQUAL"),
460 (
"left",
"PLUS",
"MINUS"),
461 (
"left",
"TIMES",
"DIVIDE"),
467 (
"left",
"PLUS",
"MINUS"),
468 (
"left",
"TIMES",
"DIVIDE"),
474 "UnaryBooleanNode": 0,
475 "BinaryBooleanNode": 1,
476 "UnaryRelationalNode": 2,
477 "BinaryRelationalNode": 3,
478 "TernaryRelationalNode": 4,
479 "UnaryExpressionNode": 5,
480 "BinaryExpressionNode": 6,
508 a_operation_types = {
517 def get_node_type(self, node_name: str):
519 Return the node type integer value
520 or node name if verbose setting is chosen.
522 return node_name
if self.verbose
else self.node_types[node_name]
524 def get_coper_type(self, coper_name: str):
526 Return the comparison operator type integer value
527 or comparison operator name if verbose setting is chosen.
529 return coper_name
if self.verbose
else self.c_operator_types[coper_name]
531 def get_boper_type(self, boper_name: str):
533 Return the boolean operator type integer value
534 or boolean operator name if verbose setting is chosen.
536 return boper_name
if self.verbose
else self.b_operator_types[boper_name]
538 def get_a_operation_type(self, operation_name: str):
540 Return the arithmetic operator type integer value
541 or arithmetic operator token if verbose setting is chosen.
543 return operation_name
if self.verbose
else self.a_operation_types[operation_name]
545 if parser_type ==
"cut":
546 @_(r"", r"boolean_expression",)
549 Parsing function for <cut> nonterminal
553 | <boolean_expression>
556 return p.boolean_expression
557 except AttributeError:
559 self.get_node_type(
"UnaryRelationalNode"),
561 self.get_node_type(
"BooleanNode"),
567 def boolean_expression(self, p):
569 Parsing function for <boolean_expression> nonterminal
572 <boolean_expression> ::= <disjunction>
576 @_(r"disjunction OR conjunction", r"conjunction")
577 def disjunction(self, p):
579 Parsing function for <disjunction> nonterminal
582 <disjunction> ::= <conjunction>
583 | <disjunction> OR <conjunction>
587 self.get_node_type(
"BinaryBooleanNode"),
590 self.get_boper_type(p.OR),
592 except AttributeError:
595 @_(r"conjunction AND negation", r"negation")
596 def conjunction(self, p):
598 Parsing function for <conjunction> nonterminal
601 <conjunction> ::= <negation>
602 | <conjunction> AND <negation>
606 self.get_node_type(
"BinaryBooleanNode"),
609 self.get_boper_type(p.AND),
611 except AttributeError:
614 @_(r"bracket_expression", r"NOT negation")
615 def negation(self, p):
617 Parsing function for <negation> nonterminal
620 <negation> ::= <bracket_expression>
624 return p.bracket_expression
625 except AttributeError:
627 self.get_node_type(
"UnaryBooleanNode"),
633 @_( # noqa: F821
r"relational_expression",
r"LBRACK boolean_expression RBRACK")
634 def bracket_expression(self, p):
636 Parsing function for <bracket_expression> nonterminal
639 <bracket_expression> ::= <relational_expression>
640 | LBRACK <boolean_expression> RBRACK
643 return p.relational_expression
644 except AttributeError:
646 self.get_node_type(
"UnaryBooleanNode"),
647 p.boolean_expression,
653 def relational_expression(self, p):
655 Parsing function for <relational_expression> nonterminal
658 <relational_expression> ::= <expression>
661 return (self.get_node_type(
"UnaryRelationalNode"), p.expression)
663 @_(r"expression comparison_operator expression")
664 def relational_expression(self, p):
666 Parsing function for <relational_expression> nonterminal
669 <relational_expression> ::= <expression> <comparison_operator>
673 self.get_node_type(
"BinaryRelationalNode"),
676 self.get_coper_type(p.comparison_operator),
679 @_(r"expression comparison_operator expression comparison_operator expression")
680 def relational_expression(self, p):
682 Parsing function for <relational_expression> nonterminal
685 <relational_expression> ::= expression> <comparison_operator>
686 <expression> <comparison_operator> <expression>
689 self.get_node_type(
"TernaryRelationalNode"),
693 self.get_coper_type(p.comparison_operator0),
694 self.get_coper_type(p.comparison_operator1),
697 @_( # noqa: F821
r"EQUALEQUAL",
r"GREATER",
r"LESS",
r"GREATEREQUAL",
r"LESSEQUAL",
r"NOTEQUAL",
)
698 def comparison_operator(self, p):
700 Parsing function for <comparison_operator> nonterminal
703 <comparison_operator> ::= EQUALEQUAL
713 def expression(self, p):
715 Parsing function for <expression> nonterminal
718 <expression> ::= <sum>
722 @_(r"sum PLUS term", r"sum MINUS term", r"term")
725 Parsing function for <sum> nonterminal
734 self.get_node_type(
"BinaryExpressionNode"),
737 self.get_a_operation_type(p[1]),
739 except AttributeError:
742 @_(r"term TIMES factor", r"term DIVIDE factor", r"factor")
745 Parsing function for <term> nonterminal
749 | <term> TIMES <factor>
750 | <term> DIVIDE <factor>
754 self.get_node_type(
"BinaryExpressionNode"),
757 self.get_a_operation_type(p[1]),
759 except AttributeError:
765 Parsing function for <power> nonterminal
775 Parsing function for <factor> nonterminal
778 <factor> ::= PLUS <factor>
781 self.get_node_type(
"UnaryExpressionNode"),
790 Parsing function for <factor> nonterminal
793 <factor> ::= MINUS factor
796 self.get_node_type(
"UnaryExpressionNode"),
805 Parsing function for <power> nonterminal
808 <power> ::= <primary>
812 @_(r"primary POWER factor")
815 Parsing function for <power> nonterminal
818 <power> ::= <primary> POWER <factor>
821 self.get_node_type(
"BinaryExpressionNode"),
824 self.get_a_operation_type(p.POWER),
828 def primary(self, p):
830 Parsing function for <primary> nonterminal
833 <primary> ::= <function>
837 @_(r"LPAREN expression RPAREN")
838 def primary(self, p):
840 Parsing function for <primary> nonterminal
843 <primary> ::= LPAREN <expression> RPAREN
846 self.get_node_type(
"UnaryExpressionNode"),
853 def primary(self, p):
855 Parsing function for <primary> nonterminal
858 <primary> ::= INTEGER
860 return (self.get_node_type(
"IntegerNode"), p.INTEGER)
863 def primary(self, p):
865 Parsing function for <primary> nonterminal
870 return (self.get_node_type(
"DoubleNode"), p.DOUBLE)
873 def primary(self, p):
875 Parsing function for <primary> nonterminal
878 <primary> ::= BOOLEAN
880 return (self.get_node_type(
"BooleanNode"), p.BOOLEAN)
883 def primary(self, p):
885 Parsing function for <primary> nonterminal
888 <primary> ::= IDENTIFIER
890 if self.parameter_stack:
892 self.get_node_type(
"IdentifierNode"),
897 self.get_node_type(
"IdentifierNode"),
901 @_(r"IDENTIFIER ARGUMENTTUPLE")
902 def function(self, p):
904 Parsing function for <function> nonterminal
907 <function> ::= IDENTIFIER LPAREN <parameters> RPAREN
909 if self.parameter_stack:
911 self.get_node_type(
"FunctionNode"),
913 p.ARGUMENTTUPLE[1:-1],
917 self.get_node_type(
"FunctionNode"),
919 p.ARGUMENTTUPLE[1:-1],
924 Error function, called immediately if syntax error is detected
925 @param p (sly.token) offending token p
926 p is None if syntax error occurs at EOF.
931 except AttributeError:
933 error_pos = len(self.cut) - 1
937 except AttributeError:
939 error_token = self.symstack[-1].type
942 error_msg = f
"detected at:\n{self.cut}\n{' '*error_pos}^\n"
943 error_msg += f
"Unexpected token '{error_token}'"
944 raise SyntaxError(error_msg)
946 def parse(self, cut: str, token_generator) -> tuple:
948 Overwrite sly.Parser parse function.
949 @param cut unparsed cut input which is used to
950 indicate where the error occurred
951 @param token_generator generator object which yields tokens.
952 Produced by the lexer from the cut input.
956 return super().parse(token_generator)
961 B2Parser = parser_class_decorator(Parser, parser_type=
"cut")
963 B2ExpressionParser = parser_class_decorator(Parser, parser_type=
"expression")
966 def parse(cut: str, verbose=
False) -> tuple:
968 Initialize a parser and lexer object and parse cut
969 @param cut cut string which should be parsed
970 @param verbose provide verbose parsing output for
971 parser debugging purposes, not to be set true in production
974 parser = B2Parser(verbose)
975 return parser.parse(cut, lexer.tokenize(cut))
978 def parse_expression(cut: str, verbose=
False) -> tuple:
980 Initialize a parser and lexer object and parse cut
981 @param cut cut string which should be parsed
982 @param verbose provide verbose parsing output for
983 parser debugging purposes, not to be set true in production
986 parser = B2ExpressionParser(verbose)
987 return parser.parse(cut, lexer.tokenize(cut))
990 if __name__ ==
"__main__":
991 argparser = argparse.ArgumentParser()
992 argparser.add_argument(
993 "-e",
"--expression", action=
"store_const", default=0, const=1
995 args = argparser.parse_args()
997 cut = input(
"Please input expression:\n")
998 print(parse_expression(cut))
1000 cut = input(
"Please input cut:\n")
1002
control_token_stack
control_token_stack (list): stack for keeping track of seen brackets and parenthesis.
def ARGUMENTTUPLE(self, t)
index
Increment current scanning position.
unsigned long int findMatchedParenthesis(std::string str, char open='[', char close=']')
Returns position of the matched closing parenthesis if the first character in the given string contai...