9from sly
import Lexer, Parser
12def findMatchedParenthesis(string: str, openchar: str, closechar: str) -> int:
14 Finds matching control token in string and returns the offset.
15 The string's first character must match openchar.
16 Otherwise, 0 is returned.
20 openchar (str): opening char e.g '{'
21 closechar (str): closing char e.g '}'
24 int: position of matching closing char in string.
27 if string[0] == openchar:
29 while end < len(string)
and count > 0:
30 if string[end] == openchar:
32 elif string[end] == closechar:
36 raise SyntaxError(
"Matched parenthesis for metavariable could not be found.")
42 Lexer class responsible for changing the default scanning behavior.
43 It disables token scanning and captures
44 everything within the matched parenthesis.
45 Call pop_state to return to the default scanning state.
48 tokens = {ARGUMENTTUPLE, }
53 Capture metavariable parameters by finding the matched parenthesis.
56 t (sly.lex.token): token of type LPAREN
59 t (sly.lex.token): ARGUMENTTUPLE token
62 pos = findMatchedParenthesis(self.text[self.
index-1:],
"(",
")")
64 t.value = self.text[self.
index-1: self.
index+pos]
75 Class responsible for scanning the cut and generating a stream of tokens.
76 The token stream can be passed to `B2Parser` to generate a syntax tree.
80 """Initialize Lexer"""
81 ## control_token_stack (list): stack for keeping track of seen brackets
82 ## and parenthesis. Allows finding parenthesis and bracket syntax
83 ## errors on scanner level.
84 self.control_token_stack = list()
86 ## cut specific tokens
89 RBRACK, LBRACK, # noqa: F821
91 AND, OR, NOT, # noqa: F821
92 # comparison operators
93 EQUALEQUAL, GREATEREQUAL, LESSEQUAL, GREATER, LESS, # noqa: F821
94 NOTEQUAL, # noqa: F821
96 ## expression tokens, also needed for cut.
98 LPAREN, RPAREN, # noqa: F821
100 DOUBLE, INTEGER, IDENTIFIER, BOOLEAN, # noqa: F821
101 # arithmetic operators
102 POWER, TIMES, DIVIDE, PLUS, MINUS # noqa: F821
105 tokens = expression_tokens.union(cut_tokens)
108 ## ignore spaces tabs and newlines
112 ## comma token definition as literal
115 # Comparison operator token definitions
116 ## token regular expression for '=='
118 ## token regular expression for '>='
120 ## token regular expression for '<='
122 ## token regular expression for '>'
124 ## token regular expression for '<'
126 ## token regular expression for '!='
129 # Arithmetic operator token definitions
130 ## token regular expression for power, both '**' and '^' allowed
132 ## token regular expression for '*'
134 ## token regular expression for '/'
136 ## token regular expression for '+'
138 ## token regular expression for '-'
141 # Scanning Functions for tokens which
142 # require additional operations
143 # regular expressions are supplied via @_ decorator
145 @_(r"\[") # noqa: F821
148 Scan opening bracket.
151 t (sly.lex.token): token of type LBRACK
154 SyntaxError: if no following closing bracket is found
158 Pushes 'BRACK' onto control_token_stack
163 if "]" not in self.text[self.index:]:
164 raise SyntaxError("Unmatched '[' in cut.")
165 self.control_token_stack.append("BRACK")
168 @_(r"\]") # noqa: F821
171 Scan closing bracket.
174 t (sly.lex.token): token of type RBRACK
177 SyntaxError: 1. If control_token_stack is empty, which means
178 no bracket was opened previously.
179 2. If state of control_token_stack is 'PAREN', which
180 means a closing parenthesis is expected.
183 Pops object from control_token_stack
189 state = self.control_token_stack.pop()
190 except IndexError: # pop from empty list
191 raise SyntaxError("Unmatched ']' in cut.")
194 elif state == "PAREN":
195 raise SyntaxError("Illegal ']', expected ')'.")
197 @_(r"\(") # noqa: F821
200 Scan opening parenthesis.
203 t (sly.lex.token): token of type LPAREN
206 SyntaxError: if no following closing parenthesis is found
210 Pushes 'PAREN' onto control_token_stack
215 if ")" not in self.text[self.index:]:
216 raise SyntaxError("Unmatched '('")
217 self.control_token_stack.append("PAREN")
220 @_(r"\)") # noqa: F821
223 Scan closing parenthesis.
226 t (sly.lex.token): token of type RPAREN
229 SyntaxError: 1. If control_token_stack is empty, which means
230 no parenthesis was opened previously.
231 2. If state of control_token_stack is 'BRACK', which
232 means a closing bracket is expected.
235 Pops state from control_token_stack
241 state = self.control_token_stack.pop()
242 except IndexError: # pop from empty list
243 raise SyntaxError("Unmatched ')' in cut.")
245 raise SyntaxError("Illegal ')', expected ']'.")
246 elif state == "PAREN":
249 @_(r"((\d+\.\d*|\d*\.\d+)(e(-|\+)?\d+|E(-|\+)?\d+)?|\d+(e(-|\+)?\d+|E(-|\+)?\d+))") # noqa: E501, F821
252 Scanning function for double values
255 t (sly.lex.Token): initial token generated by the scanner library.
256 The value attribute is of type str initially, equals
257 the matched sequence and is casted to float.
259 Possible notations covered by this regular expression:
260 Normal decimal notation e.g 0.1
261 Hanging decimal separator notation e.g 1.
262 Preceding decimal separator notation e.g .1
263 Scientific notation with (signed) exponents e.g 1.0E4, 1.e-4, .1E+3
264 Exponents are case insensitive e.g 1.e4, 1.E4
265 Integer with exponent e.g 1E4
270 t.value = float(t.value)
273 @_(r"(0(x|X)[0-9A-Fa-f]+)|\d+") # noqa: F821
274 def INTEGER(self, t):
276 Scanning function for integer values
277 Allows normal and hex notation (case insensitive)
280 t (sly.lex.Token): initial token generated by the scanner library.
281 The value attribute is of type str initially, equals
282 the matched sequence and is casted to int.
285 python int-objects are converted
286 to the standard c++ int datatype (32bit).
287 Overflows can happen because numerical limits
288 of python int and c++ int datatypes differ.
289 If you need to input large values write it as double.
295 t.value = int(t.value)
297 # casting hex notation
298 t.value = int(t.value, base=16)
301 @_(r"[a-zA-Z_][a-zA-Z_0-9]*") # noqa: F821
302 def IDENTIFIER(self, t):
304 Scanning function for identifiers
306 If a matched sequence equals reserved keywords of other tokens
307 the token type and value is remapped via the reserved dictionary.
310 t (sly.lex.Token): initial token generated by the scanner library.
311 value attribute equals the matched sequence.
325 "infinity": "DOUBLE",
328 # Check for reserved words
329 t.type = reserved.get(t.value, "IDENTIFIER")
331 # Set value to bool if BOOLEAN type was returned from reserved dict.
332 if t.type == "BOOLEAN":
333 t.value = t.value == "True" or t.value == "true"
334 # Take care of special infinity and nan values.
335 if t.type == "DOUBLE":
336 t.value = float(t.value)
337 if t.type == "IDENTIFIER":
339 if self.text[self.index] == "(":
340 # Check that closing parenthesis exists
341 if ")" not in self.text[self.index:]:
342 raise SyntaxError("Unmatched '('")
344 self.push_state(B2ParameterLexer)
350def parser_class_decorator(cls, parser_type):
352 Class decorator which allows creating a Parser class object
353 for the B2Parser and B2ExpressionParser without repeating the class body.
356 parser_type (str): choice of parser type, 'cut' or 'expression'
359 (type): returns a parser class object
361 assert parser_type in (
364 ), "Invalid parser type, valid choices are 'cut' or 'expression'"
366 class B2ParserMixin(cls):
368 Parser class implementing the grammar specified below.
370 Full Grammar Specification:
372 | <boolean_expression>
374 <boolean_expression> ::= <disjunction>
376 <disjunction> ::= <conjunction>
377 | <disjunction> OR <conjunction>
379 <conjunction> ::= <negation>
380 | <conjunction> AND <negation>
382 <negation> ::= <bracket_expression>
385 <bracket_expression> ::= <relational_expression>
386 | LBRACK <boolean_expression> RBRACK
388 <relational_expression> ::= <expression>
389 | <expression> <comparison_operator> <expression>
390 | <expression> <comparison_operator> <expression>
391 <comparison_operator> <expression>
393 <comparison_operator> ::= EQUALEQUAL
400 <expression> ::= <sum>
407 | <term> TIMES <factor>
408 | <term> DIVIDE <factor>
414 <power> ::= <primary>
415 | <primary> POWER <factor>
417 <primary> ::= LPAREN <expression> RPAREN
424 <function> ::= IDENTIFIER ARGUMENTTUPLE
427 def __init__(self, verbose=False):
430 @param verbose run parser in verbose mode. The nodetype names in
431 the parsed tuple are written out and not encoded
432 as integers. Useful for debugging parsing errors.
435 ## verbose setting, creates more human readable tuple output
436 ## only for testing, debugging purposes
437 ## not used in production, as default of kwarg is False
438 self.verbose = verbose
439 ## parameter state stack
440 ## used for scope detection of variables and metavariables
441 self.parameter_stack = list()
443 if parser_type == "cut":
444 ## token list for B2Parser include cut specific tokens
445 tokens = B2Lexer.tokens.union(B2ParameterLexer.tokens)
447 ## token list for B2ExpressionParser exclude cut specific tokens
448 tokens = B2Lexer.expression_tokens.union(B2ParameterLexer.tokens)
450 # Define precedence of operators starting with lowest precedence
451 # first element of tuple indicates associativity of operator
452 if parser_type == "cut":
453 ## Precedence definition for B2Parser
454 precedence = ( # noqa: F841
458 ("left", "EQUALEQUAL", "GREATER", "LESS",
459 "GREATEREQUAL", "LESSEQUAL", "NOTEQUAL"),
460 ("left", "PLUS", "MINUS"),
461 ("left", "TIMES", "DIVIDE"),
465 ## Reduced precedence definition for B2ExpressionParser
466 precedence = ( # noqa: F841
467 ("left", "PLUS", "MINUS"),
468 ("left", "TIMES", "DIVIDE"),
471 ## Dict for encoding nodetypes to integers
472 ## Must match enum in framework/utilities/AbstractNodes.h
474 "UnaryBooleanNode": 0,
475 "BinaryBooleanNode": 1,
476 "UnaryRelationalNode": 2,
477 "BinaryRelationalNode": 3,
478 "TernaryRelationalNode": 4,
479 "UnaryExpressionNode": 5,
480 "BinaryExpressionNode": 6,
488 ## Dict for encoding boolean operator types to integers
489 ## Must match BooleanOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
495 ## Dict for encoding comparison operator types to integers
496 ## Must match ComparisonOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
506 ## Dict for encoding arithmetic operator types to integers
507 ## Must match ArithmeticOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
508 a_operation_types = {
517 def get_node_type(self, node_name: str):
519 Return the node type integer value
520 or node name if verbose setting is chosen.
522 return node_name if self.verbose else self.node_types[node_name]
524 def get_coper_type(self, coper_name: str):
526 Return the comparison operator type integer value
527 or comparison operator name if verbose setting is chosen.
529 return coper_name if self.verbose else self.c_operator_types[coper_name] # noqa: E501
531 def get_boper_type(self, boper_name: str):
533 Return the boolean operator type integer value
534 or boolean operator name if verbose setting is chosen.
536 return boper_name if self.verbose else self.b_operator_types[boper_name] # noqa: E501
538 def get_a_operation_type(self, operation_name: str):
540 Return the arithmetic operator type integer value
541 or arithmetic operator token if verbose setting is chosen.
543 return operation_name if self.verbose else self.a_operation_types[operation_name] # noqa: E501
545 if parser_type == "cut":
546 @_(r"", r"boolean_expression",) # noqa: F821
549 Parsing function for <cut> nonterminal
553 | <boolean_expression>
556 return p.boolean_expression
557 except AttributeError:
559 self.get_node_type("UnaryRelationalNode"),
561 self.get_node_type("BooleanNode"),
566 @_(r"disjunction") # noqa: F821
567 def boolean_expression(self, p):
569 Parsing function for <boolean_expression> nonterminal
572 <boolean_expression> ::= <disjunction>
576 @_(r"disjunction OR conjunction", r"conjunction") # noqa: F821
577 def disjunction(self, p):
579 Parsing function for <disjunction> nonterminal
582 <disjunction> ::= <conjunction>
583 | <disjunction> OR <conjunction>
587 self.get_node_type("BinaryBooleanNode"),
590 self.get_boper_type(p.OR),
592 except AttributeError:
595 @_(r"conjunction AND negation", r"negation") # noqa: F821
596 def conjunction(self, p):
598 Parsing function for <conjunction> nonterminal
601 <conjunction> ::= <negation>
602 | <conjunction> AND <negation>
606 self.get_node_type("BinaryBooleanNode"),
609 self.get_boper_type(p.AND),
611 except AttributeError:
614 @_(r"bracket_expression", r"NOT negation") # noqa: F821
615 def negation(self, p):
617 Parsing function for <negation> nonterminal
620 <negation> ::= <bracket_expression>
624 return p.bracket_expression
625 except AttributeError:
627 self.get_node_type("UnaryBooleanNode"),
634 r"relational_expression",
635 r"LBRACK boolean_expression RBRACK")
636 def bracket_expression(self, p):
638 Parsing function for <bracket_expression> nonterminal
641 <bracket_expression> ::= <relational_expression>
642 | LBRACK <boolean_expression> RBRACK
645 return p.relational_expression
646 except AttributeError:
648 self.get_node_type("UnaryBooleanNode"),
649 p.boolean_expression,
654 @_(r"expression") # noqa: F821
655 def relational_expression(self, p): # noqa: F811
657 Parsing function for <relational_expression> nonterminal
660 <relational_expression> ::= <expression>
663 return (self.get_node_type("UnaryRelationalNode"), p.expression)
665 @_(r"expression comparison_operator expression") # noqa: F821
666 def relational_expression(self, p): # noqa: F811
668 Parsing function for <relational_expression> nonterminal
671 <relational_expression> ::= <expression> <comparison_operator>
675 self.get_node_type("BinaryRelationalNode"),
678 self.get_coper_type(p.comparison_operator),
681 @_(r"expression comparison_operator expression comparison_operator expression") # noqa: F821, E501
682 def relational_expression(self, p): # noqa: F811
684 Parsing function for <relational_expression> nonterminal
687 <relational_expression> ::= expression> <comparison_operator>
688 <expression> <comparison_operator> <expression>
691 self.get_node_type("TernaryRelationalNode"),
695 self.get_coper_type(p.comparison_operator0),
696 self.get_coper_type(p.comparison_operator1),
707 def comparison_operator(self, p):
709 Parsing function for <comparison_operator> nonterminal
712 <comparison_operator> ::= EQUALEQUAL
721 @_(r"sum") # noqa: F821
722 def expression(self, p):
724 Parsing function for <expression> nonterminal
727 <expression> ::= <sum>
731 @_(r"sum PLUS term", r"sum MINUS term", r"term") # noqa: F821
734 Parsing function for <sum> nonterminal
743 self.get_node_type("BinaryExpressionNode"),
746 self.get_a_operation_type(p[1]),
748 except AttributeError:
751 @_(r"term TIMES factor", r"term DIVIDE factor", r"factor") # noqa: F821, E501
754 Parsing function for <term> nonterminal
758 | <term> TIMES <factor>
759 | <term> DIVIDE <factor>
763 self.get_node_type("BinaryExpressionNode"),
766 self.get_a_operation_type(p[1]),
768 except AttributeError:
771 @_(r"power") # noqa: F821
774 Parsing function for <power> nonterminal
781 @_(r"PLUS factor") # noqa: F821
782 def factor(self, p): # noqa: F811
784 Parsing function for <factor> nonterminal
787 <factor> ::= PLUS <factor>
790 self.get_node_type("UnaryExpressionNode"),
796 @_(r"MINUS factor") # noqa: F821
797 def factor(self, p): # noqa: F811
799 Parsing function for <factor> nonterminal
802 <factor> ::= MINUS factor
805 self.get_node_type("UnaryExpressionNode"),
811 @_(r"primary") # noqa: F821
814 Parsing function for <power> nonterminal
817 <power> ::= <primary>
821 @_(r"primary POWER factor") # noqa: F821
822 def power(self, p): # noqa: F811
824 Parsing function for <power> nonterminal
827 <power> ::= <primary> POWER <factor>
830 self.get_node_type("BinaryExpressionNode"),
833 self.get_a_operation_type(p.POWER),
836 @_(r"function") # noqa: F821
837 def primary(self, p):
839 Parsing function for <primary> nonterminal
842 <primary> ::= <function>
846 @_(r"LPAREN expression RPAREN") # noqa: F821
847 def primary(self, p): # noqa: F811
849 Parsing function for <primary> nonterminal
852 <primary> ::= LPAREN <expression> RPAREN
855 self.get_node_type("UnaryExpressionNode"),
861 @_(r"INTEGER") # noqa: F821
862 def primary(self, p): # noqa: F811
864 Parsing function for <primary> nonterminal
867 <primary> ::= INTEGER
869 return (self.get_node_type("IntegerNode"), p.INTEGER)
871 @_(r"DOUBLE") # noqa: F821
872 def primary(self, p): # noqa: F811
874 Parsing function for <primary> nonterminal
879 return (self.get_node_type("DoubleNode"), p.DOUBLE)
881 @_(r"BOOLEAN") # noqa: F821
882 def primary(self, p): # noqa: F811
884 Parsing function for <primary> nonterminal
887 <primary> ::= BOOLEAN
889 return (self.get_node_type("BooleanNode"), p.BOOLEAN)
891 @_(r"IDENTIFIER") # noqa: F821
892 def primary(self, p): # noqa: F811
894 Parsing function for <primary> nonterminal
897 <primary> ::= IDENTIFIER
899 if self.parameter_stack:
901 self.get_node_type("IdentifierNode"),
906 self.get_node_type("IdentifierNode"),
910 @_(r"IDENTIFIER ARGUMENTTUPLE") # noqa: F821
911 def function(self, p):
913 Parsing function for <function> nonterminal
916 <function> ::= IDENTIFIER LPAREN <parameters> RPAREN
918 if self.parameter_stack:
920 self.get_node_type("FunctionNode"),
922 p.ARGUMENTTUPLE[1:-1],
926 self.get_node_type("FunctionNode"),
928 p.ARGUMENTTUPLE[1:-1],
933 Error function, called immediately if syntax error is detected
934 @param p (sly.token) offending token p
935 p is None if syntax error occurs at EOF.
938 # Get error position of offending token in cut.
940 except AttributeError: # syntax error at EOF, p is None
941 # Set error position to length of cut minus one.
942 error_pos = len(self.cut) - 1
944 # Get error token type
946 except AttributeError:
947 # syntax error at EOF get last token from stack
948 error_token = self.symstack[-1].type
950 # Format error message
951 error_msg = f"detected at:\n{self.cut}\n{' '*error_pos}^\n"
952 error_msg += f"Unexpected token '{error_token}'"
953 raise SyntaxError(error_msg)
955 def parse(self, cut: str, token_generator) -> tuple:
957 Overwrite sly.Parser parse function.
958 @param cut unparsed cut input which is used to
959 indicate where the error occurred
960 @param token_generator generator object which yields tokens.
961 Produced by the lexer from the cut input.
963 ## Set cut attribute needed in case of an error.
965 return super().parse(token_generator)
970B2Parser = parser_class_decorator(Parser, parser_type="cut")
972B2ExpressionParser = parser_class_decorator(Parser, parser_type="expression")
975def parse(cut: str, verbose=False) -> tuple:
977 Initialize a parser and lexer object and parse cut
978 @param cut cut string which should be parsed
979 @param verbose provide verbose parsing output for
980 parser debugging purposes, not to be set true in production
983 parser = B2Parser(verbose)
984 return parser.parse(cut, lexer.tokenize(cut))
987def parse_expression(cut: str, verbose=False) -> tuple:
989 Initialize a parser and lexer object and parse cut
990 @param cut cut string which should be parsed
991 @param verbose provide verbose parsing output for
992 parser debugging purposes, not to be set true in production
995 parser = B2ExpressionParser(verbose)
996 return parser.parse(cut, lexer.tokenize(cut))
999if __name__ == "__main__":
1000 argparser = argparse.ArgumentParser()
1001 argparser.add_argument(
1002 "-e", "--expression", action="store_const", default=0, const=1
1004 args = argparser.parse_args()
1006 cut = input("Please input expression:\n")
1007 print(parse_expression(cut))
1009 cut = input("Please input cut:\n")
index
Increment current scanning position.