Belle II Software prerelease-10-00-00a
b2parser.py
1
8import argparse
9from sly import Lexer, Parser
10
11
12def findMatchedParenthesis(string: str, openchar: str, closechar: str) -> int:
13 """
14 Finds matching control token in string and returns the offset.
15 The string's first character must match openchar.
16 Otherwise, 0 is returned.
17
18 Args:
19 string (str): input
20 openchar (str): opening char e.g '{'
21 closechar (str): closing char e.g '}'
22
23 Returns:
24 int: position of matching closing char in string.
25 """
26 end = 1
27 if string[0] == openchar:
28 count = 1
29 while end < len(string) and count > 0:
30 if string[end] == openchar:
31 count += 1
32 elif string[end] == closechar:
33 count -= 1
34 end += 1
35 if count > 0:
36 raise SyntaxError("Matched parenthesis for metavariable could not be found.")
37 return end - 1
38
39
40class B2ParameterLexer(Lexer):
41 """
42 Lexer class responsible for changing the default scanning behavior.
43 It disables token scanning and captures
44 everything within the matched parenthesis.
45 Call pop_state to return to the default scanning state.
46 """
47
48 tokens = {ARGUMENTTUPLE, } # noqa: F821
49
50 @_(r"\‍(") # noqa: F821
51 def ARGUMENTTUPLE(self, t):
52 """
53 Capture metavariable parameters by finding the matched parenthesis.
54
55 Args:
56 t (sly.lex.token): token of type LPAREN
57
58 Returns:
59 t (sly.lex.token): ARGUMENTTUPLE token
60 """
61 # Find the offset of the matching parenthesis
62 pos = findMatchedParenthesis(self.text[self.index-1:], "(", ")")
63 # Set value to slice
64 t.value = self.text[self.index-1: self.index+pos]
65
66 self.index = self.index+pos
67 # Return to default B2Lexer state
68 self.pop_state()
69 # Return token
70 return t
71
72
73class B2Lexer(Lexer):
74 """
75 Class responsible for scanning the cut and generating a stream of tokens.
76 The token stream can be passed to `B2Parser` to generate a syntax tree.
77 """
78
79 def __init__(self):
80 """Initialize Lexer"""
81 ## control_token_stack (list): stack for keeping track of seen brackets
82 ## and parenthesis. Allows finding parenthesis and bracket syntax
83 ## errors on scanner level.
84 self.control_token_stack = list()
85
86 ## cut specific tokens
87 cut_tokens = {
88 # structure tokens
89 RBRACK, LBRACK, # noqa: F821
90 # boolean operators
91 AND, OR, NOT, # noqa: F821
92 # comparison operators
93 EQUALEQUAL, GREATEREQUAL, LESSEQUAL, GREATER, LESS, # noqa: F821
94 NOTEQUAL, # noqa: F821
95 }
96 ## expression tokens, also needed for cut.
97 expression_tokens = {
98 LPAREN, RPAREN, # noqa: F821
99 # data types
100 DOUBLE, INTEGER, IDENTIFIER, BOOLEAN, # noqa: F821
101 # arithmetic operators
102 POWER, TIMES, DIVIDE, PLUS, MINUS # noqa: F821
103 }
104 ## Set of all tokens
105 tokens = expression_tokens.union(cut_tokens)
106
107 # Scanning Rules
108 ## ignore spaces tabs and newlines
109 ignore = " \t\n"
110
111 # Token definitions
112 ## comma token definition as literal
113 literals = {r","}
114
115 # Comparison operator token definitions
116 ## token regular expression for '=='
117 EQUALEQUAL = r"=="
118 ## token regular expression for '>='
119 GREATEREQUAL = r">="
120 ## token regular expression for '<='
121 LESSEQUAL = r"<="
122 ## token regular expression for '>'
123 GREATER = r">"
124 ## token regular expression for '<'
125 LESS = r"<"
126 ## token regular expression for '!='
127 NOTEQUAL = r"!="
128
129 # Arithmetic operator token definitions
130 ## token regular expression for power, both '**' and '^' allowed
131 POWER = r"\*\*|\^"
132 ## token regular expression for '*'
133 TIMES = r"\*"
134 ## token regular expression for '/'
135 DIVIDE = r"/"
136 ## token regular expression for '+'
137 PLUS = r"\+"
138 ## token regular expression for '-'
139 MINUS = r"-"
140
141 # Scanning Functions for tokens which
142 # require additional operations
143 # regular expressions are supplied via @_ decorator
144
145 @_(r"\[") # noqa: F821
146 def LBRACK(self, t):
147 """
148 Scan opening bracket.
149
150 Parameters:
151 t (sly.lex.token): token of type LBRACK
152
153 Raises:
154 SyntaxError: if no following closing bracket is found
155 in the input.
156
157 Side Effect:
158 Pushes 'BRACK' onto control_token_stack
159
160 Returns:
161 sly.lex.Token
162 """
163 if "]" not in self.text[self.index:]:
164 raise SyntaxError("Unmatched '[' in cut.")
165 self.control_token_stack.append("BRACK")
166 return t
167
168 @_(r"\]") # noqa: F821
169 def RBRACK(self, t):
170 """
171 Scan closing bracket.
172
173 Parameters:
174 t (sly.lex.token): token of type RBRACK
175
176 Raises:
177 SyntaxError: 1. If control_token_stack is empty, which means
178 no bracket was opened previously.
179 2. If state of control_token_stack is 'PAREN', which
180 means a closing parenthesis is expected.
181
182 Side Effect:
183 Pops object from control_token_stack
184
185 Returns:
186 sly.lex.Token
187 """
188 try:
189 state = self.control_token_stack.pop()
190 except IndexError: # pop from empty list
191 raise SyntaxError("Unmatched ']' in cut.")
192 if state == "BRACK":
193 return t
194 elif state == "PAREN":
195 raise SyntaxError("Illegal ']', expected ')'.")
196
197 @_(r"\‍(") # noqa: F821
198 def LPAREN(self, t):
199 """
200 Scan opening parenthesis.
201
202 Parameters:
203 t (sly.lex.token): token of type LPAREN
204
205 Raises:
206 SyntaxError: if no following closing parenthesis is found
207 in the input.
208
209 Side Effect:
210 Pushes 'PAREN' onto control_token_stack
211
212 Returns:
213 sly.lex.Token
214 """
215 if ")" not in self.text[self.index:]:
216 raise SyntaxError("Unmatched '('")
217 self.control_token_stack.append("PAREN")
218 return t
219
220 @_(r"\‍)") # noqa: F821
221 def RPAREN(self, t):
222 """
223 Scan closing parenthesis.
224
225 Parameters:
226 t (sly.lex.token): token of type RPAREN
227
228 Raises:
229 SyntaxError: 1. If control_token_stack is empty, which means
230 no parenthesis was opened previously.
231 2. If state of control_token_stack is 'BRACK', which
232 means a closing bracket is expected.
233
234 Side Effect:
235 Pops state from control_token_stack
236
237 Returns:
238 sly.lex.Token
239 """
240 try:
241 state = self.control_token_stack.pop()
242 except IndexError: # pop from empty list
243 raise SyntaxError("Unmatched ')' in cut.")
244 if state == "BRACK":
245 raise SyntaxError("Illegal ')', expected ']'.")
246 elif state == "PAREN":
247 return t
248
249 @_(r"((\d+\.\d*|\d*\.\d+)(e(-|\+)?\d+|E(-|\+)?\d+)?|\d+(e(-|\+)?\d+|E(-|\+)?\d+))") # noqa: E501, F821
250 def DOUBLE(self, t):
251 """
252 Scanning function for double values
253
254 Parameters:
255 t (sly.lex.Token): initial token generated by the scanner library.
256 The value attribute is of type str initially, equals
257 the matched sequence and is casted to float.
258
259 Possible notations covered by this regular expression:
260 Normal decimal notation e.g 0.1
261 Hanging decimal separator notation e.g 1.
262 Preceding decimal separator notation e.g .1
263 Scientific notation with (signed) exponents e.g 1.0E4, 1.e-4, .1E+3
264 Exponents are case insensitive e.g 1.e4, 1.E4
265 Integer with exponent e.g 1E4
266
267 Returns:
268 sly.lex.Token
269 """
270 t.value = float(t.value)
271 return t
272
273 @_(r"(0(x|X)[0-9A-Fa-f]+)|\d+") # noqa: F821
274 def INTEGER(self, t):
275 """
276 Scanning function for integer values
277 Allows normal and hex notation (case insensitive)
278
279 Parameters:
280 t (sly.lex.Token): initial token generated by the scanner library.
281 The value attribute is of type str initially, equals
282 the matched sequence and is casted to int.
283
284 Warning:
285 python int-objects are converted
286 to the standard c++ int datatype (32bit).
287 Overflows can happen because numerical limits
288 of python int and c++ int datatypes differ.
289 If you need to input large values write it as double.
290
291 Returns:
292 sly.lex.Token
293 """
294 try:
295 t.value = int(t.value)
296 except ValueError:
297 # casting hex notation
298 t.value = int(t.value, base=16)
299 return t
300
301 @_(r"[a-zA-Z_][a-zA-Z_0-9]*") # noqa: F821
302 def IDENTIFIER(self, t):
303 """
304 Scanning function for identifiers
305
306 If a matched sequence equals reserved keywords of other tokens
307 the token type and value is remapped via the reserved dictionary.
308
309 Parameters:
310 t (sly.lex.Token): initial token generated by the scanner library.
311 value attribute equals the matched sequence.
312
313 Returns:
314 sly.lex.Token
315 """
316 reserved = {
317 "and": "AND",
318 "or": "OR",
319 "not": "NOT",
320 "True": "BOOLEAN",
321 "true": "BOOLEAN",
322 "False": "BOOLEAN",
323 "false": "BOOLEAN",
324 "nan": "DOUBLE",
325 "infinity": "DOUBLE",
326 "inf": "DOUBLE",
327 }
328 # Check for reserved words
329 t.type = reserved.get(t.value, "IDENTIFIER")
330
331 # Set value to bool if BOOLEAN type was returned from reserved dict.
332 if t.type == "BOOLEAN":
333 t.value = t.value == "True" or t.value == "true"
334 # Take care of special infinity and nan values.
335 if t.type == "DOUBLE":
336 t.value = float(t.value)
337 if t.type == "IDENTIFIER":
338 try:
339 if self.text[self.index] == "(":
340 # Check that closing parenthesis exists
341 if ")" not in self.text[self.index:]:
342 raise SyntaxError("Unmatched '('")
343 else:
344 self.push_state(B2ParameterLexer)
345 except IndexError:
346 pass
347 return t
348
349
350def parser_class_decorator(cls, parser_type):
351 """
352 Class decorator which allows creating a Parser class object
353 for the B2Parser and B2ExpressionParser without repeating the class body.
354
355 Args:
356 parser_type (str): choice of parser type, 'cut' or 'expression'
357
358 Returns:
359 (type): returns a parser class object
360 """
361 assert parser_type in (
362 "cut",
363 "expression",
364 ), "Invalid parser type, valid choices are 'cut' or 'expression'"
365
366 class B2ParserMixin(cls):
367 """
368 Parser class implementing the grammar specified below.
369
370 Full Grammar Specification:
371 <cut> ::= EMPTY
372 | <boolean_expression>
373
374 <boolean_expression> ::= <disjunction>
375
376 <disjunction> ::= <conjunction>
377 | <disjunction> OR <conjunction>
378
379 <conjunction> ::= <negation>
380 | <conjunction> AND <negation>
381
382 <negation> ::= <bracket_expression>
383 | NOT <negation>
384
385 <bracket_expression> ::= <relational_expression>
386 | LBRACK <boolean_expression> RBRACK
387
388 <relational_expression> ::= <expression>
389 | <expression> <comparison_operator> <expression>
390 | <expression> <comparison_operator> <expression>
391 <comparison_operator> <expression>
392
393 <comparison_operator> ::= EQUALEQUAL
394 | GREATER
395 | LESS
396 | GREATEREQUAL
397 | LESSEQUAL
398 | NOTEQUAL
399
400 <expression> ::= <sum>
401
402 <sum> ::= <term>
403 | <sum> PLUS <term>
404 | <sum> MINUS <term>
405
406 <term> ::= <factor>
407 | <term> TIMES <factor>
408 | <term> DIVIDE <factor>
409
410 <factor> ::= <power>
411 | PLUS <factor>
412 | MINUS <factor>
413
414 <power> ::= <primary>
415 | <primary> POWER <factor>
416
417 <primary> ::= LPAREN <expression> RPAREN
418 | <function>
419 | IDENTIFIER
420 | INTEGER
421 | BOOLEAN
422 | DOUBLE
423
424 <function> ::= IDENTIFIER ARGUMENTTUPLE
425 """
426
427 def __init__(self, verbose=False):
428 """
429 Initialize Parser
430 @param verbose run parser in verbose mode. The nodetype names in
431 the parsed tuple are written out and not encoded
432 as integers. Useful for debugging parsing errors.
433 """
434 super().__init__()
435 ## verbose setting, creates more human readable tuple output
436 ## only for testing, debugging purposes
437 ## not used in production, as default of kwarg is False
438 self.verbose = verbose
439 ## parameter state stack
440 ## used for scope detection of variables and metavariables
441 self.parameter_stack = list()
442
443 if parser_type == "cut":
444 ## token list for B2Parser include cut specific tokens
445 tokens = B2Lexer.tokens.union(B2ParameterLexer.tokens)
446 else:
447 ## token list for B2ExpressionParser exclude cut specific tokens
448 tokens = B2Lexer.expression_tokens.union(B2ParameterLexer.tokens)
449 start = parser_type
450 # Define precedence of operators starting with lowest precedence
451 # first element of tuple indicates associativity of operator
452 if parser_type == "cut":
453 ## Precedence definition for B2Parser
454 precedence = ( # noqa: F841
455 ("left", "OR"),
456 ("left", "AND"),
457 ("nonassoc", "NOT"),
458 ("left", "EQUALEQUAL", "GREATER", "LESS",
459 "GREATEREQUAL", "LESSEQUAL", "NOTEQUAL"),
460 ("left", "PLUS", "MINUS"),
461 ("left", "TIMES", "DIVIDE"),
462 ("right", "POWER"),
463 )
464 else:
465 ## Reduced precedence definition for B2ExpressionParser
466 precedence = ( # noqa: F841
467 ("left", "PLUS", "MINUS"),
468 ("left", "TIMES", "DIVIDE"),
469 ("right", "POWER"),
470 )
471 ## Dict for encoding nodetypes to integers
472 ## Must match enum in framework/utilities/AbstractNodes.h
473 node_types = {
474 "UnaryBooleanNode": 0,
475 "BinaryBooleanNode": 1,
476 "UnaryRelationalNode": 2,
477 "BinaryRelationalNode": 3,
478 "TernaryRelationalNode": 4,
479 "UnaryExpressionNode": 5,
480 "BinaryExpressionNode": 6,
481 "FunctionNode": 7,
482 "IdentifierNode": 8,
483 "DoubleNode": 9,
484 "IntegerNode": 10,
485 "BooleanNode": 11,
486 }
487
488 ## Dict for encoding boolean operator types to integers
489 ## Must match BooleanOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
490 b_operator_types = {
491 "and": 0,
492 "or": 1,
493 }
494
495 ## Dict for encoding comparison operator types to integers
496 ## Must match ComparisonOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
497 c_operator_types = {
498 "==": 0,
499 ">=": 1,
500 "<=": 2,
501 ">": 3,
502 "<": 4,
503 "!=": 5,
504 }
505
506 ## Dict for encoding arithmetic operator types to integers
507 ## Must match ArithmeticOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
508 a_operation_types = {
509 "+": 0,
510 "-": 1,
511 "*": 2,
512 "/": 3,
513 "**": 4,
514 "^": 4
515 }
516
517 def get_node_type(self, node_name: str):
518 """
519 Return the node type integer value
520 or node name if verbose setting is chosen.
521 """
522 return node_name if self.verbose else self.node_types[node_name]
523
524 def get_coper_type(self, coper_name: str):
525 """
526 Return the comparison operator type integer value
527 or comparison operator name if verbose setting is chosen.
528 """
529 return coper_name if self.verbose else self.c_operator_types[coper_name] # noqa: E501
530
531 def get_boper_type(self, boper_name: str):
532 """
533 Return the boolean operator type integer value
534 or boolean operator name if verbose setting is chosen.
535 """
536 return boper_name if self.verbose else self.b_operator_types[boper_name] # noqa: E501
537
538 def get_a_operation_type(self, operation_name: str):
539 """
540 Return the arithmetic operator type integer value
541 or arithmetic operator token if verbose setting is chosen.
542 """
543 return operation_name if self.verbose else self.a_operation_types[operation_name] # noqa: E501
544
545 if parser_type == "cut":
546 @_(r"", r"boolean_expression",) # noqa: F821
547 def cut(self, p):
548 """
549 Parsing function for <cut> nonterminal
550
551 Grammar rules:
552 <cut> ::= EMPTY
553 | <boolean_expression>
554 """
555 try:
556 return p.boolean_expression
557 except AttributeError:
558 return (
559 self.get_node_type("UnaryRelationalNode"),
560 (
561 self.get_node_type("BooleanNode"),
562 True
563 )
564 )
565
566 @_(r"disjunction") # noqa: F821
567 def boolean_expression(self, p):
568 """
569 Parsing function for <boolean_expression> nonterminal
570
571 Grammar rule:
572 <boolean_expression> ::= <disjunction>
573 """
574 return p.disjunction
575
576 @_(r"disjunction OR conjunction", r"conjunction") # noqa: F821
577 def disjunction(self, p):
578 """
579 Parsing function for <disjunction> nonterminal
580
581 Grammar rules:
582 <disjunction> ::= <conjunction>
583 | <disjunction> OR <conjunction>
584 """
585 try:
586 return (
587 self.get_node_type("BinaryBooleanNode"),
588 p.disjunction,
589 p.conjunction,
590 self.get_boper_type(p.OR),
591 )
592 except AttributeError:
593 return p.conjunction
594
595 @_(r"conjunction AND negation", r"negation") # noqa: F821
596 def conjunction(self, p):
597 """
598 Parsing function for <conjunction> nonterminal
599
600 Grammar rules:
601 <conjunction> ::= <negation>
602 | <conjunction> AND <negation>
603 """
604 try:
605 return (
606 self.get_node_type("BinaryBooleanNode"),
607 p.conjunction,
608 p.negation,
609 self.get_boper_type(p.AND),
610 )
611 except AttributeError:
612 return p.negation
613
614 @_(r"bracket_expression", r"NOT negation") # noqa: F821
615 def negation(self, p):
616 """
617 Parsing function for <negation> nonterminal
618
619 Grammar rules:
620 <negation> ::= <bracket_expression>
621 | NOT <negation>
622 """
623 try:
624 return p.bracket_expression
625 except AttributeError:
626 return (
627 self.get_node_type("UnaryBooleanNode"),
628 p.negation,
629 True,
630 False,
631 )
632
633 @_( # noqa: F821
634 r"relational_expression",
635 r"LBRACK boolean_expression RBRACK")
636 def bracket_expression(self, p):
637 """
638 Parsing function for <bracket_expression> nonterminal
639
640 Grammar rules:
641 <bracket_expression> ::= <relational_expression>
642 | LBRACK <boolean_expression> RBRACK
643 """
644 try:
645 return p.relational_expression
646 except AttributeError:
647 return (
648 self.get_node_type("UnaryBooleanNode"),
649 p.boolean_expression,
650 False,
651 True,
652 )
653
654 @_(r"expression") # noqa: F821
655 def relational_expression(self, p): # noqa: F811
656 """
657 Parsing function for <relational_expression> nonterminal
658
659 Grammar rule:
660 <relational_expression> ::= <expression>
661 """
662
663 return (self.get_node_type("UnaryRelationalNode"), p.expression)
664
665 @_(r"expression comparison_operator expression") # noqa: F821
666 def relational_expression(self, p): # noqa: F811
667 """
668 Parsing function for <relational_expression> nonterminal
669
670 Grammar rule:
671 <relational_expression> ::= <expression> <comparison_operator>
672 <expression>
673 """
674 return (
675 self.get_node_type("BinaryRelationalNode"),
676 p.expression0,
677 p.expression1,
678 self.get_coper_type(p.comparison_operator),
679 )
680
681 @_(r"expression comparison_operator expression comparison_operator expression") # noqa: F821, E501
682 def relational_expression(self, p): # noqa: F811
683 """
684 Parsing function for <relational_expression> nonterminal
685
686 Grammar rule:
687 <relational_expression> ::= expression> <comparison_operator>
688 <expression> <comparison_operator> <expression>
689 """
690 return (
691 self.get_node_type("TernaryRelationalNode"),
692 p.expression0,
693 p.expression1,
694 p.expression2,
695 self.get_coper_type(p.comparison_operator0),
696 self.get_coper_type(p.comparison_operator1),
697 )
698
699 @_( # noqa: F821
700 r"EQUALEQUAL",
701 r"GREATER",
702 r"LESS",
703 r"GREATEREQUAL",
704 r"LESSEQUAL",
705 r"NOTEQUAL",
706 )
707 def comparison_operator(self, p):
708 """
709 Parsing function for <comparison_operator> nonterminal
710
711 Grammar rules:
712 <comparison_operator> ::= EQUALEQUAL
713 | GREATER
714 | LESS
715 | GREATEREQUAL
716 | LESSEQUAL
717 | NOTEQUAL
718 """
719 return p[0]
720
721 @_(r"sum") # noqa: F821
722 def expression(self, p):
723 """
724 Parsing function for <expression> nonterminal
725
726 Grammar rule:
727 <expression> ::= <sum>
728 """
729 return p.sum
730
731 @_(r"sum PLUS term", r"sum MINUS term", r"term") # noqa: F821
732 def sum(self, p):
733 """
734 Parsing function for <sum> nonterminal
735
736 Grammar rules:
737 <sum> ::= <term>
738 | <sum> PLUS <term>
739 | <sum> MINUS <term>
740 """
741 try:
742 return (
743 self.get_node_type("BinaryExpressionNode"),
744 p.sum,
745 p.term,
746 self.get_a_operation_type(p[1]),
747 )
748 except AttributeError:
749 return p.term
750
751 @_(r"term TIMES factor", r"term DIVIDE factor", r"factor") # noqa: F821, E501
752 def term(self, p):
753 """
754 Parsing function for <term> nonterminal
755
756 Grammar rules:
757 <term> ::= <factor>
758 | <term> TIMES <factor>
759 | <term> DIVIDE <factor>
760 """
761 try:
762 return (
763 self.get_node_type("BinaryExpressionNode"),
764 p.term,
765 p.factor,
766 self.get_a_operation_type(p[1]),
767 )
768 except AttributeError:
769 return p.factor
770
771 @_(r"power") # noqa: F821
772 def factor(self, p):
773 """
774 Parsing function for <power> nonterminal
775
776 Grammar rule:
777 <factor> ::= <power>
778 """
779 return p.power
780
781 @_(r"PLUS factor") # noqa: F821
782 def factor(self, p): # noqa: F811
783 """
784 Parsing function for <factor> nonterminal
785
786 Grammar rules:
787 <factor> ::= PLUS <factor>
788 """
789 return (
790 self.get_node_type("UnaryExpressionNode"),
791 p.factor,
792 False,
793 False,
794 )
795
796 @_(r"MINUS factor") # noqa: F821
797 def factor(self, p): # noqa: F811
798 """
799 Parsing function for <factor> nonterminal
800
801 Grammar rule:
802 <factor> ::= MINUS factor
803 """
804 return (
805 self.get_node_type("UnaryExpressionNode"),
806 p.factor,
807 True,
808 False,
809 )
810
811 @_(r"primary") # noqa: F821
812 def power(self, p):
813 """
814 Parsing function for <power> nonterminal
815
816 Grammar rule:
817 <power> ::= <primary>
818 """
819 return p.primary
820
821 @_(r"primary POWER factor") # noqa: F821
822 def power(self, p): # noqa: F811
823 """
824 Parsing function for <power> nonterminal
825
826 Grammar rule:
827 <power> ::= <primary> POWER <factor>
828 """
829 return (
830 self.get_node_type("BinaryExpressionNode"),
831 p.primary,
832 p.factor,
833 self.get_a_operation_type(p.POWER),
834 )
835
836 @_(r"function") # noqa: F821
837 def primary(self, p):
838 """
839 Parsing function for <primary> nonterminal
840
841 Grammar rule:
842 <primary> ::= <function>
843 """
844 return p.function
845
846 @_(r"LPAREN expression RPAREN") # noqa: F821
847 def primary(self, p): # noqa: F811
848 """
849 Parsing function for <primary> nonterminal
850
851 Grammar rule:
852 <primary> ::= LPAREN <expression> RPAREN
853 """
854 return (
855 self.get_node_type("UnaryExpressionNode"),
856 p.expression,
857 False,
858 True,
859 )
860
861 @_(r"INTEGER") # noqa: F821
862 def primary(self, p): # noqa: F811
863 """
864 Parsing function for <primary> nonterminal
865
866 Grammar rule:
867 <primary> ::= INTEGER
868 """
869 return (self.get_node_type("IntegerNode"), p.INTEGER)
870
871 @_(r"DOUBLE") # noqa: F821
872 def primary(self, p): # noqa: F811
873 """
874 Parsing function for <primary> nonterminal
875
876 Grammar rule:
877 <primary> ::= DOUBLE
878 """
879 return (self.get_node_type("DoubleNode"), p.DOUBLE)
880
881 @_(r"BOOLEAN") # noqa: F821
882 def primary(self, p): # noqa: F811
883 """
884 Parsing function for <primary> nonterminal
885
886 Grammar rule:
887 <primary> ::= BOOLEAN
888 """
889 return (self.get_node_type("BooleanNode"), p.BOOLEAN)
890
891 @_(r"IDENTIFIER") # noqa: F821
892 def primary(self, p): # noqa: F811
893 """
894 Parsing function for <primary> nonterminal
895
896 Grammar rule:
897 <primary> ::= IDENTIFIER
898 """
899 if self.parameter_stack:
900 return (
901 self.get_node_type("IdentifierNode"),
902 p.IDENTIFIER,
903 )
904 else:
905 return (
906 self.get_node_type("IdentifierNode"),
907 p.IDENTIFIER,
908 )
909
910 @_(r"IDENTIFIER ARGUMENTTUPLE") # noqa: F821
911 def function(self, p):
912 """
913 Parsing function for <function> nonterminal
914
915 Grammar rule:
916 <function> ::= IDENTIFIER LPAREN <parameters> RPAREN
917 """
918 if self.parameter_stack:
919 return (
920 self.get_node_type("FunctionNode"),
921 p.IDENTIFIER,
922 p.ARGUMENTTUPLE[1:-1],
923 )
924 else:
925 return (
926 self.get_node_type("FunctionNode"),
927 p.IDENTIFIER,
928 p.ARGUMENTTUPLE[1:-1],
929 )
930
931 def error(self, p):
932 """
933 Error function, called immediately if syntax error is detected
934 @param p (sly.token) offending token p
935 p is None if syntax error occurs at EOF.
936 """
937 try:
938 # Get error position of offending token in cut.
939 error_pos = p.index
940 except AttributeError: # syntax error at EOF, p is None
941 # Set error position to length of cut minus one.
942 error_pos = len(self.cut) - 1
943 try:
944 # Get error token type
945 error_token = p.type
946 except AttributeError:
947 # syntax error at EOF get last token from stack
948 error_token = self.symstack[-1].type
949
950 # Format error message
951 error_msg = f"detected at:\n{self.cut}\n{' '*error_pos}^\n"
952 error_msg += f"Unexpected token '{error_token}'"
953 raise SyntaxError(error_msg)
954
955 def parse(self, cut: str, token_generator) -> tuple:
956 """
957 Overwrite sly.Parser parse function.
958 @param cut unparsed cut input which is used to
959 indicate where the error occurred
960 @param token_generator generator object which yields tokens.
961 Produced by the lexer from the cut input.
962 """
963 ## Set cut attribute needed in case of an error.
964 self.cut = cut
965 return super().parse(token_generator)
966
967 return B2ParserMixin
968
969
970B2Parser = parser_class_decorator(Parser, parser_type="cut")
971
972B2ExpressionParser = parser_class_decorator(Parser, parser_type="expression")
973
974
975def parse(cut: str, verbose=False) -> tuple:
976 """
977 Initialize a parser and lexer object and parse cut
978 @param cut cut string which should be parsed
979 @param verbose provide verbose parsing output for
980 parser debugging purposes, not to be set true in production
981 """
982 lexer = B2Lexer()
983 parser = B2Parser(verbose)
984 return parser.parse(cut, lexer.tokenize(cut))
985
986
987def parse_expression(cut: str, verbose=False) -> tuple:
988 """
989 Initialize a parser and lexer object and parse cut
990 @param cut cut string which should be parsed
991 @param verbose provide verbose parsing output for
992 parser debugging purposes, not to be set true in production
993 """
994 lexer = B2Lexer()
995 parser = B2ExpressionParser(verbose)
996 return parser.parse(cut, lexer.tokenize(cut))
997
998
999if __name__ == "__main__":
1000 argparser = argparse.ArgumentParser()
1001 argparser.add_argument(
1002 "-e", "--expression", action="store_const", default=0, const=1
1003 )
1004 args = argparser.parse_args()
1005 if args.expression:
1006 cut = input("Please input expression:\n")
1007 print(parse_expression(cut))
1008 else:
1009 cut = input("Please input cut:\n")
1010 print(parse(cut))
index
Increment current scanning position.
Definition b2parser.py:66