9from sly
import Lexer, Parser
12def findMatchedParenthesis(string: str, openchar: str, closechar: str) -> int:
14 Finds matching control token in string and returns the offset.
15 The string's first character must match openchar.
16 Otherwise, 0 is returned.
20 openchar (str): opening char e.g '{'
21 closechar (str): closing char e.g '}'
24 int: position of matching closing char in string.
27 if string[0] == openchar:
29 while end < len(string)
and count > 0:
30 if string[end] == openchar:
32 elif string[end] == closechar:
36 raise SyntaxError(
"Matched parenthesis for metavariable could not be found.")
42 Lexer class responsible for changing the default scanning behavior.
43 It disables token scanning and captures
44 everything within the matched parenthesis.
45 Call pop_state to return to the default scanning state.
48 tokens = {ARGUMENTTUPLE, }
53 Capture metavariable parameters by finding the matched parenthesis.
56 t (sly.lex.token): token of type LPAREN
59 t (sly.lex.token): ARGUMENTTUPLE token
62 pos = findMatchedParenthesis(self.text[self.
index-1:],
"(",
")")
64 t.value = self.text[self.
index-1: self.
index+pos]
75 Class responsible for scanning the cut and generating a stream of tokens.
76 The token stream can be passed to `B2Parser` to generate a syntax tree.
80 """Initialize Lexer"""
81 ## control_token_stack (list): stack for keeping track of seen brackets
82 ## and parenthesis. Allows finding parenthesis and bracket syntax
83 ## errors on scanner level.
84 self.control_token_stack = list()
86 ## cut specific tokens
89 RBRACK, LBRACK, # noqa: F821
91 AND, OR, NOT, # noqa: F821
92 # comparison operators
93 EQUALEQUAL, GREATEREQUAL, LESSEQUAL, GREATER, LESS, # noqa: F821
94 NOTEQUAL, # noqa: F821
96 ## expression tokens, also needed for cut.
98 LPAREN, RPAREN, # noqa: F821
100 DOUBLE, INTEGER, IDENTIFIER, BOOLEAN, # noqa: F821
101 # arithmetic operators
102 POWER, TIMES, DIVIDE, PLUS, MINUS # noqa: F821
105 tokens = expression_tokens.union(cut_tokens)
108 ## ignore spaces tabs and newlines
112 ## comma token definition as literal
115 # Comparison operator token definitions
116 ## token regular expression for '=='
118 ## token regular expression for '>='
120 ## token regular expression for '<='
122 ## token regular expression for '>'
124 ## token regular expression for '<'
126 ## token regular expression for '!='
129 # Arithmetic operator token definitions
130 ## token regular expression for power, both '**' and '^' allowed
132 ## token regular expression for '*'
134 ## token regular expression for '/'
136 ## token regular expression for '+'
138 ## token regular expression for '-'
141 # Scanning Functions for tokens which
142 # require additional operations
143 # regular expressions are supplied via @_ decorator
145 @_(r"\[") # noqa: F821
148 Scan opening bracket.
151 t (sly.lex.token): token of type LBRACK
154 SyntaxError: if no following closing bracket is found
158 Pushes 'BRACK' onto control_token_stack
163 # \cond false positive doxygen warning
164 if "]" not in self.text[self.index:]:
165 raise SyntaxError("Unmatched '[' in cut.")
166 self.control_token_stack.append("BRACK")
170 @_(r"\]") # noqa: F821
173 Scan closing bracket.
176 t (sly.lex.token): token of type RBRACK
179 SyntaxError: 1. If control_token_stack is empty, which means
180 no bracket was opened previously.
181 2. If state of control_token_stack is 'PAREN', which
182 means a closing parenthesis is expected.
185 Pops object from control_token_stack
191 state = self.control_token_stack.pop()
192 except IndexError: # pop from empty list
193 raise SyntaxError("Unmatched ']' in cut.")
196 elif state == "PAREN":
197 raise SyntaxError("Illegal ']', expected ')'.")
199 @_(r"\(") # noqa: F821
202 Scan opening parenthesis.
205 t (sly.lex.token): token of type LPAREN
208 SyntaxError: if no following closing parenthesis is found
212 Pushes 'PAREN' onto control_token_stack
217 # \cond false positive doxygen warning
218 if ")" not in self.text[self.index:]:
219 raise SyntaxError("Unmatched '('")
220 self.control_token_stack.append("PAREN")
224 @_(r"\)") # noqa: F821
227 Scan closing parenthesis.
230 t (sly.lex.token): token of type RPAREN
233 SyntaxError: 1. If control_token_stack is empty, which means
234 no parenthesis was opened previously.
235 2. If state of control_token_stack is 'BRACK', which
236 means a closing bracket is expected.
239 Pops state from control_token_stack
245 state = self.control_token_stack.pop()
246 except IndexError: # pop from empty list
247 raise SyntaxError("Unmatched ')' in cut.")
249 raise SyntaxError("Illegal ')', expected ']'.")
250 elif state == "PAREN":
253 @_(r"((\d+\.\d*|\d*\.\d+)(e(-|\+)?\d+|E(-|\+)?\d+)?|\d+(e(-|\+)?\d+|E(-|\+)?\d+))") # noqa: E501, F821
256 Scanning function for double values
259 t (sly.lex.Token): initial token generated by the scanner library.
260 The value attribute is of type str initially, equals
261 the matched sequence and is casted to float.
263 Possible notations covered by this regular expression:
264 Normal decimal notation e.g 0.1
265 Hanging decimal separator notation e.g 1.
266 Preceding decimal separator notation e.g .1
267 Scientific notation with (signed) exponents e.g 1.0E4, 1.e-4, .1E+3
268 Exponents are case insensitive e.g 1.e4, 1.E4
269 Integer with exponent e.g 1E4
274 t.value = float(t.value)
277 @_(r"(0(x|X)[0-9A-Fa-f]+)|\d+") # noqa: F821
278 def INTEGER(self, t):
280 Scanning function for integer values
281 Allows normal and hex notation (case insensitive)
284 t (sly.lex.Token): initial token generated by the scanner library.
285 The value attribute is of type str initially, equals
286 the matched sequence and is casted to int.
289 python int-objects are converted
290 to the standard c++ int datatype (32bit).
291 Overflows can happen because numerical limits
292 of python int and c++ int datatypes differ.
293 If you need to input large values write it as double.
299 t.value = int(t.value)
301 # casting hex notation
302 t.value = int(t.value, base=16)
305 @_(r"[a-zA-Z_][a-zA-Z_0-9]*") # noqa: F821
306 def IDENTIFIER(self, t):
308 Scanning function for identifiers
310 If a matched sequence equals reserved keywords of other tokens
311 the token type and value is remapped via the reserved dictionary.
314 t (sly.lex.Token): initial token generated by the scanner library.
315 value attribute equals the matched sequence.
329 "infinity": "DOUBLE",
332 # Check for reserved words
333 t.type = reserved.get(t.value, "IDENTIFIER")
335 # Set value to bool if BOOLEAN type was returned from reserved dict.
336 if t.type == "BOOLEAN":
337 t.value = t.value == "True" or t.value == "true"
338 # Take care of special infinity and nan values.
339 if t.type == "DOUBLE":
340 t.value = float(t.value)
341 # \cond false positive doxygen warning
342 if t.type == "IDENTIFIER":
344 if self.text[self.index] == "(":
345 # Check that closing parenthesis exists
346 if ")" not in self.text[self.index:]:
347 raise SyntaxError("Unmatched '('")
349 self.push_state(B2ParameterLexer)
356def parser_class_decorator(cls, parser_type):
358 Class decorator which allows creating a Parser class object
359 for the B2Parser and B2ExpressionParser without repeating the class body.
362 parser_type (str): choice of parser type, 'cut' or 'expression'
365 (type): returns a parser class object
367 assert parser_type in (
370 ), "Invalid parser type, valid choices are 'cut' or 'expression'"
372 class B2ParserMixin(cls):
374 Parser class implementing the grammar specified below.
376 Full Grammar Specification:
378 | <boolean_expression>
380 <boolean_expression> ::= <disjunction>
382 <disjunction> ::= <conjunction>
383 | <disjunction> OR <conjunction>
385 <conjunction> ::= <negation>
386 | <conjunction> AND <negation>
388 <negation> ::= <bracket_expression>
391 <bracket_expression> ::= <relational_expression>
392 | LBRACK <boolean_expression> RBRACK
394 <relational_expression> ::= <expression>
395 | <expression> <comparison_operator> <expression>
396 | <expression> <comparison_operator> <expression>
397 <comparison_operator> <expression>
399 <comparison_operator> ::= EQUALEQUAL
406 <expression> ::= <sum>
413 | <term> TIMES <factor>
414 | <term> DIVIDE <factor>
420 <power> ::= <primary>
421 | <primary> POWER <factor>
423 <primary> ::= LPAREN <expression> RPAREN
430 <function> ::= IDENTIFIER ARGUMENTTUPLE
433 def __init__(self, verbose=False):
436 @param verbose run parser in verbose mode. The nodetype names in
437 the parsed tuple are written out and not encoded
438 as integers. Useful for debugging parsing errors.
441 ## verbose setting, creates more human readable tuple output
442 ## only for testing, debugging purposes
443 ## not used in production, as default of kwarg is False
444 self.verbose = verbose
445 ## parameter state stack
446 ## used for scope detection of variables and metavariables
447 self.parameter_stack = list()
449 if parser_type == "cut":
450 ## token list for B2Parser include cut specific tokens
451 tokens = B2Lexer.tokens.union(B2ParameterLexer.tokens)
453 ## token list for B2ExpressionParser exclude cut specific tokens
454 tokens = B2Lexer.expression_tokens.union(B2ParameterLexer.tokens)
456 # Define precedence of operators starting with lowest precedence
457 # first element of tuple indicates associativity of operator
458 if parser_type == "cut":
459 ## Precedence definition for B2Parser
460 precedence = ( # noqa: F841
464 ("left", "EQUALEQUAL", "GREATER", "LESS",
465 "GREATEREQUAL", "LESSEQUAL", "NOTEQUAL"),
466 ("left", "PLUS", "MINUS"),
467 ("left", "TIMES", "DIVIDE"),
471 ## Reduced precedence definition for B2ExpressionParser
472 precedence = ( # noqa: F841
473 ("left", "PLUS", "MINUS"),
474 ("left", "TIMES", "DIVIDE"),
477 ## Dict for encoding nodetypes to integers
478 ## Must match enum in framework/utilities/AbstractNodes.h
480 "UnaryBooleanNode": 0,
481 "BinaryBooleanNode": 1,
482 "UnaryRelationalNode": 2,
483 "BinaryRelationalNode": 3,
484 "TernaryRelationalNode": 4,
485 "UnaryExpressionNode": 5,
486 "BinaryExpressionNode": 6,
494 ## Dict for encoding boolean operator types to integers
495 ## Must match BooleanOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
501 ## Dict for encoding comparison operator types to integers
502 ## Must match ComparisonOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
512 ## Dict for encoding arithmetic operator types to integers
513 ## Must match ArithmeticOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
514 a_operation_types = {
523 def get_node_type(self, node_name: str):
525 Return the node type integer value
526 or node name if verbose setting is chosen.
528 return node_name if self.verbose else self.node_types[node_name]
530 def get_coper_type(self, coper_name: str):
532 Return the comparison operator type integer value
533 or comparison operator name if verbose setting is chosen.
535 return coper_name if self.verbose else self.c_operator_types[coper_name] # noqa: E501
537 def get_boper_type(self, boper_name: str):
539 Return the boolean operator type integer value
540 or boolean operator name if verbose setting is chosen.
542 return boper_name if self.verbose else self.b_operator_types[boper_name] # noqa: E501
544 def get_a_operation_type(self, operation_name: str):
546 Return the arithmetic operator type integer value
547 or arithmetic operator token if verbose setting is chosen.
549 return operation_name if self.verbose else self.a_operation_types[operation_name] # noqa: E501
551 if parser_type == "cut":
552 @_(r"", r"boolean_expression",) # noqa: F821
555 Parsing function for <cut> nonterminal
559 | <boolean_expression>
562 return p.boolean_expression
563 except AttributeError:
565 self.get_node_type("UnaryRelationalNode"),
567 self.get_node_type("BooleanNode"),
572 @_(r"disjunction") # noqa: F821
573 def boolean_expression(self, p):
575 Parsing function for <boolean_expression> nonterminal
578 <boolean_expression> ::= <disjunction>
582 @_(r"disjunction OR conjunction", r"conjunction") # noqa: F821
583 def disjunction(self, p):
585 Parsing function for <disjunction> nonterminal
588 <disjunction> ::= <conjunction>
589 | <disjunction> OR <conjunction>
593 self.get_node_type("BinaryBooleanNode"),
596 self.get_boper_type(p.OR),
598 except AttributeError:
601 @_(r"conjunction AND negation", r"negation") # noqa: F821
602 def conjunction(self, p):
604 Parsing function for <conjunction> nonterminal
607 <conjunction> ::= <negation>
608 | <conjunction> AND <negation>
612 self.get_node_type("BinaryBooleanNode"),
615 self.get_boper_type(p.AND),
617 except AttributeError:
620 @_(r"bracket_expression", r"NOT negation") # noqa: F821
621 def negation(self, p):
623 Parsing function for <negation> nonterminal
626 <negation> ::= <bracket_expression>
630 return p.bracket_expression
631 except AttributeError:
633 self.get_node_type("UnaryBooleanNode"),
640 r"relational_expression",
641 r"LBRACK boolean_expression RBRACK")
642 def bracket_expression(self, p):
644 Parsing function for <bracket_expression> nonterminal
647 <bracket_expression> ::= <relational_expression>
648 | LBRACK <boolean_expression> RBRACK
651 return p.relational_expression
652 except AttributeError:
654 self.get_node_type("UnaryBooleanNode"),
655 p.boolean_expression,
660 @_(r"expression") # noqa: F821
661 def relational_expression(self, p): # noqa: F811
663 Parsing function for <relational_expression> nonterminal
666 <relational_expression> ::= <expression>
669 return (self.get_node_type("UnaryRelationalNode"), p.expression)
671 @_(r"expression comparison_operator expression") # noqa: F821
672 def relational_expression(self, p): # noqa: F811
674 Parsing function for <relational_expression> nonterminal
677 <relational_expression> ::= <expression> <comparison_operator>
681 self.get_node_type("BinaryRelationalNode"),
684 self.get_coper_type(p.comparison_operator),
687 @_(r"expression comparison_operator expression comparison_operator expression") # noqa: F821, E501
688 def relational_expression(self, p): # noqa: F811
690 Parsing function for <relational_expression> nonterminal
693 <relational_expression> ::= expression> <comparison_operator>
694 <expression> <comparison_operator> <expression>
697 self.get_node_type("TernaryRelationalNode"),
701 self.get_coper_type(p.comparison_operator0),
702 self.get_coper_type(p.comparison_operator1),
713 def comparison_operator(self, p):
715 Parsing function for <comparison_operator> nonterminal
718 <comparison_operator> ::= EQUALEQUAL
727 @_(r"sum") # noqa: F821
728 def expression(self, p):
730 Parsing function for <expression> nonterminal
733 <expression> ::= <sum>
737 @_(r"sum PLUS term", r"sum MINUS term", r"term") # noqa: F821
740 Parsing function for <sum> nonterminal
749 self.get_node_type("BinaryExpressionNode"),
752 self.get_a_operation_type(p[1]),
754 except AttributeError:
757 @_(r"term TIMES factor", r"term DIVIDE factor", r"factor") # noqa: F821, E501
760 Parsing function for <term> nonterminal
764 | <term> TIMES <factor>
765 | <term> DIVIDE <factor>
769 self.get_node_type("BinaryExpressionNode"),
772 self.get_a_operation_type(p[1]),
774 except AttributeError:
777 @_(r"power") # noqa: F821
780 Parsing function for <power> nonterminal
787 @_(r"PLUS factor") # noqa: F821
788 def factor(self, p): # noqa: F811
790 Parsing function for <factor> nonterminal
793 <factor> ::= PLUS <factor>
796 self.get_node_type("UnaryExpressionNode"),
802 @_(r"MINUS factor") # noqa: F821
803 def factor(self, p): # noqa: F811
805 Parsing function for <factor> nonterminal
808 <factor> ::= MINUS factor
811 self.get_node_type("UnaryExpressionNode"),
817 @_(r"primary") # noqa: F821
820 Parsing function for <power> nonterminal
823 <power> ::= <primary>
827 @_(r"primary POWER factor") # noqa: F821
828 def power(self, p): # noqa: F811
830 Parsing function for <power> nonterminal
833 <power> ::= <primary> POWER <factor>
836 self.get_node_type("BinaryExpressionNode"),
839 self.get_a_operation_type(p.POWER),
842 @_(r"function") # noqa: F821
843 def primary(self, p):
845 Parsing function for <primary> nonterminal
848 <primary> ::= <function>
852 @_(r"LPAREN expression RPAREN") # noqa: F821
853 def primary(self, p): # noqa: F811
855 Parsing function for <primary> nonterminal
858 <primary> ::= LPAREN <expression> RPAREN
861 self.get_node_type("UnaryExpressionNode"),
867 @_(r"INTEGER") # noqa: F821
868 def primary(self, p): # noqa: F811
870 Parsing function for <primary> nonterminal
873 <primary> ::= INTEGER
875 return (self.get_node_type("IntegerNode"), p.INTEGER)
877 @_(r"DOUBLE") # noqa: F821
878 def primary(self, p): # noqa: F811
880 Parsing function for <primary> nonterminal
885 return (self.get_node_type("DoubleNode"), p.DOUBLE)
887 @_(r"BOOLEAN") # noqa: F821
888 def primary(self, p): # noqa: F811
890 Parsing function for <primary> nonterminal
893 <primary> ::= BOOLEAN
895 return (self.get_node_type("BooleanNode"), p.BOOLEAN)
897 @_(r"IDENTIFIER") # noqa: F821
898 def primary(self, p): # noqa: F811
900 Parsing function for <primary> nonterminal
903 <primary> ::= IDENTIFIER
905 if self.parameter_stack:
907 self.get_node_type("IdentifierNode"),
912 self.get_node_type("IdentifierNode"),
916 @_(r"IDENTIFIER ARGUMENTTUPLE") # noqa: F821
917 def function(self, p):
919 Parsing function for <function> nonterminal
922 <function> ::= IDENTIFIER LPAREN <parameters> RPAREN
924 if self.parameter_stack:
926 self.get_node_type("FunctionNode"),
928 p.ARGUMENTTUPLE[1:-1],
932 self.get_node_type("FunctionNode"),
934 p.ARGUMENTTUPLE[1:-1],
939 Error function, called immediately if syntax error is detected
940 @param p (sly.token) offending token p
941 p is None if syntax error occurs at EOF.
944 # Get error position of offending token in cut.
946 except AttributeError: # syntax error at EOF, p is None
947 # Set error position to length of cut minus one.
948 error_pos = len(self.cut) - 1
950 # Get error token type
952 except AttributeError:
953 # syntax error at EOF get last token from stack
954 error_token = self.symstack[-1].type
956 # Format error message
957 error_msg = f"detected at:\n{self.cut}\n{' '*error_pos}^\n"
958 error_msg += f"Unexpected token '{error_token}'"
959 raise SyntaxError(error_msg)
961 def parse(self, cut: str, token_generator) -> tuple:
963 Overwrite sly.Parser parse function.
964 @param cut unparsed cut input which is used to
965 indicate where the error occurred
966 @param token_generator generator object which yields tokens.
967 Produced by the lexer from the cut input.
969 ## Set cut attribute needed in case of an error.
971 return super().parse(token_generator)
976B2Parser = parser_class_decorator(Parser, parser_type="cut")
978B2ExpressionParser = parser_class_decorator(Parser, parser_type="expression")
981def parse(cut: str, verbose=False) -> tuple:
983 Initialize a parser and lexer object and parse cut
984 @param cut cut string which should be parsed
985 @param verbose provide verbose parsing output for
986 parser debugging purposes, not to be set true in production
989 parser = B2Parser(verbose)
990 return parser.parse(cut, lexer.tokenize(cut))
993def parse_expression(cut: str, verbose=False) -> tuple:
995 Initialize a parser and lexer object and parse cut
996 @param cut cut string which should be parsed
997 @param verbose provide verbose parsing output for
998 parser debugging purposes, not to be set true in production
1001 parser = B2ExpressionParser(verbose)
1002 return parser.parse(cut, lexer.tokenize(cut))
1005if __name__ == "__main__":
1006 argparser = argparse.ArgumentParser()
1007 argparser.add_argument(
1008 "-e", "--expression", action="store_const", default=0, const=1
1010 args = argparser.parse_args()
1012 cut = input("Please input expression:\n")
1013 print(parse_expression(cut))
1015 cut = input("Please input cut:\n")
index
Increment current scanning position.