Belle II Software development
b2parser.py
1
8import argparse
9from sly import Lexer, Parser
10
11
12def findMatchedParenthesis(string: str, openchar: str, closechar: str) -> int:
13 """
14 Finds matching control token in string and returns the offset.
15 The string's first character must match openchar.
16 Otherwise, 0 is returned.
17
18 Args:
19 string (str): input
20 openchar (str): opening char e.g '{'
21 closechar (str): closing char e.g '}'
22
23 Returns:
24 int: position of matching closing char in string.
25 """
26 end = 1
27 if string[0] == openchar:
28 count = 1
29 while end < len(string) and count > 0:
30 if string[end] == openchar:
31 count += 1
32 elif string[end] == closechar:
33 count -= 1
34 end += 1
35 if count > 0:
36 raise SyntaxError("Matched parenthesis for metavariable could not be found.")
37 return end - 1
38
39
40class B2ParameterLexer(Lexer):
41 """
42 Lexer class responsible for changing the default scanning behavior.
43 It disables token scanning and captures
44 everything within the matched parenthesis.
45 Call pop_state to return to the default scanning state.
46 """
47
48 tokens = {ARGUMENTTUPLE, } # noqa: F821
49
50 @_(r"\‍(") # noqa: F821
51 def ARGUMENTTUPLE(self, t):
52 """
53 Capture metavariable parameters by finding the matched parenthesis.
54
55 Args:
56 t (sly.lex.token): token of type LPAREN
57
58 Returns:
59 t (sly.lex.token): ARGUMENTTUPLE token
60 """
61 # Find the offset of the matching parenthesis
62 pos = findMatchedParenthesis(self.text[self.index-1:], "(", ")")
63 # Set value to slice
64 t.value = self.text[self.index-1: self.index+pos]
65
66 self.index = self.index+pos
67 # Return to default B2Lexer state
68 self.pop_state()
69 # Return token
70 return t
71
72
73class B2Lexer(Lexer):
74 """
75 Class responsible for scanning the cut and generating a stream of tokens.
76 The token stream can be passed to `B2Parser` to generate a syntax tree.
77 """
78
79 def __init__(self):
80 """Initialize Lexer"""
81 ## control_token_stack (list): stack for keeping track of seen brackets
82 ## and parenthesis. Allows finding parenthesis and bracket syntax
83 ## errors on scanner level.
84 self.control_token_stack = list()
85
86 ## cut specific tokens
87 cut_tokens = {
88 # structure tokens
89 RBRACK, LBRACK, # noqa: F821
90 # boolean operators
91 AND, OR, NOT, # noqa: F821
92 # comparison operators
93 EQUALEQUAL, GREATEREQUAL, LESSEQUAL, GREATER, LESS, # noqa: F821
94 NOTEQUAL, # noqa: F821
95 }
96 ## expression tokens, also needed for cut.
97 expression_tokens = {
98 LPAREN, RPAREN, # noqa: F821
99 # data types
100 DOUBLE, INTEGER, IDENTIFIER, BOOLEAN, # noqa: F821
101 # arithmetic operators
102 POWER, TIMES, DIVIDE, PLUS, MINUS # noqa: F821
103 }
104 ## Set of all tokens
105 tokens = expression_tokens.union(cut_tokens)
106
107 # Scanning Rules
108 ## ignore spaces tabs and newlines
109 ignore = " \t\n"
110
111 # Token definitions
112 ## comma token definition as literal
113 literals = {r","}
114
115 # Comparison operator token definitions
116 ## token regular expression for '=='
117 EQUALEQUAL = r"=="
118 ## token regular expression for '>='
119 GREATEREQUAL = r">="
120 ## token regular expression for '<='
121 LESSEQUAL = r"<="
122 ## token regular expression for '>'
123 GREATER = r">"
124 ## token regular expression for '<'
125 LESS = r"<"
126 ## token regular expression for '!='
127 NOTEQUAL = r"!="
128
129 # Arithmetic operator token definitions
130 ## token regular expression for power, both '**' and '^' allowed
131 POWER = r"\*\*|\^"
132 ## token regular expression for '*'
133 TIMES = r"\*"
134 ## token regular expression for '/'
135 DIVIDE = r"/"
136 ## token regular expression for '+'
137 PLUS = r"\+"
138 ## token regular expression for '-'
139 MINUS = r"-"
140
141 # Scanning Functions for tokens which
142 # require additional operations
143 # regular expressions are supplied via @_ decorator
144
145 @_(r"\[") # noqa: F821
146 def LBRACK(self, t):
147 """
148 Scan opening bracket.
149
150 Parameters:
151 t (sly.lex.token): token of type LBRACK
152
153 Raises:
154 SyntaxError: if no following closing bracket is found
155 in the input.
156
157 Side Effect:
158 Pushes 'BRACK' onto control_token_stack
159
160 Returns:
161 sly.lex.Token
162 """
163 # \cond false positive doxygen warning
164 if "]" not in self.text[self.index:]:
165 raise SyntaxError("Unmatched '[' in cut.")
166 self.control_token_stack.append("BRACK")
167 return t
168 # \endcond
169
170 @_(r"\]") # noqa: F821
171 def RBRACK(self, t):
172 """
173 Scan closing bracket.
174
175 Parameters:
176 t (sly.lex.token): token of type RBRACK
177
178 Raises:
179 SyntaxError: 1. If control_token_stack is empty, which means
180 no bracket was opened previously.
181 2. If state of control_token_stack is 'PAREN', which
182 means a closing parenthesis is expected.
183
184 Side Effect:
185 Pops object from control_token_stack
186
187 Returns:
188 sly.lex.Token
189 """
190 try:
191 state = self.control_token_stack.pop()
192 except IndexError: # pop from empty list
193 raise SyntaxError("Unmatched ']' in cut.")
194 if state == "BRACK":
195 return t
196 elif state == "PAREN":
197 raise SyntaxError("Illegal ']', expected ')'.")
198
199 @_(r"\‍(") # noqa: F821
200 def LPAREN(self, t):
201 """
202 Scan opening parenthesis.
203
204 Parameters:
205 t (sly.lex.token): token of type LPAREN
206
207 Raises:
208 SyntaxError: if no following closing parenthesis is found
209 in the input.
210
211 Side Effect:
212 Pushes 'PAREN' onto control_token_stack
213
214 Returns:
215 sly.lex.Token
216 """
217 # \cond false positive doxygen warning
218 if ")" not in self.text[self.index:]:
219 raise SyntaxError("Unmatched '('")
220 self.control_token_stack.append("PAREN")
221 return t
222 # \endcond
223
224 @_(r"\‍)") # noqa: F821
225 def RPAREN(self, t):
226 """
227 Scan closing parenthesis.
228
229 Parameters:
230 t (sly.lex.token): token of type RPAREN
231
232 Raises:
233 SyntaxError: 1. If control_token_stack is empty, which means
234 no parenthesis was opened previously.
235 2. If state of control_token_stack is 'BRACK', which
236 means a closing bracket is expected.
237
238 Side Effect:
239 Pops state from control_token_stack
240
241 Returns:
242 sly.lex.Token
243 """
244 try:
245 state = self.control_token_stack.pop()
246 except IndexError: # pop from empty list
247 raise SyntaxError("Unmatched ')' in cut.")
248 if state == "BRACK":
249 raise SyntaxError("Illegal ')', expected ']'.")
250 elif state == "PAREN":
251 return t
252
253 @_(r"((\d+\.\d*|\d*\.\d+)(e(-|\+)?\d+|E(-|\+)?\d+)?|\d+(e(-|\+)?\d+|E(-|\+)?\d+))") # noqa: E501, F821
254 def DOUBLE(self, t):
255 """
256 Scanning function for double values
257
258 Parameters:
259 t (sly.lex.Token): initial token generated by the scanner library.
260 The value attribute is of type str initially, equals
261 the matched sequence and is casted to float.
262
263 Possible notations covered by this regular expression:
264 Normal decimal notation e.g 0.1
265 Hanging decimal separator notation e.g 1.
266 Preceding decimal separator notation e.g .1
267 Scientific notation with (signed) exponents e.g 1.0E4, 1.e-4, .1E+3
268 Exponents are case insensitive e.g 1.e4, 1.E4
269 Integer with exponent e.g 1E4
270
271 Returns:
272 sly.lex.Token
273 """
274 t.value = float(t.value)
275 return t
276
277 @_(r"(0(x|X)[0-9A-Fa-f]+)|\d+") # noqa: F821
278 def INTEGER(self, t):
279 """
280 Scanning function for integer values
281 Allows normal and hex notation (case insensitive)
282
283 Parameters:
284 t (sly.lex.Token): initial token generated by the scanner library.
285 The value attribute is of type str initially, equals
286 the matched sequence and is casted to int.
287
288 Warning:
289 python int-objects are converted
290 to the standard c++ int datatype (32bit).
291 Overflows can happen because numerical limits
292 of python int and c++ int datatypes differ.
293 If you need to input large values write it as double.
294
295 Returns:
296 sly.lex.Token
297 """
298 try:
299 t.value = int(t.value)
300 except ValueError:
301 # casting hex notation
302 t.value = int(t.value, base=16)
303 return t
304
305 @_(r"[a-zA-Z_][a-zA-Z_0-9]*") # noqa: F821
306 def IDENTIFIER(self, t):
307 """
308 Scanning function for identifiers
309
310 If a matched sequence equals reserved keywords of other tokens
311 the token type and value is remapped via the reserved dictionary.
312
313 Parameters:
314 t (sly.lex.Token): initial token generated by the scanner library.
315 value attribute equals the matched sequence.
316
317 Returns:
318 sly.lex.Token
319 """
320 reserved = {
321 "and": "AND",
322 "or": "OR",
323 "not": "NOT",
324 "True": "BOOLEAN",
325 "true": "BOOLEAN",
326 "False": "BOOLEAN",
327 "false": "BOOLEAN",
328 "nan": "DOUBLE",
329 "infinity": "DOUBLE",
330 "inf": "DOUBLE",
331 }
332 # Check for reserved words
333 t.type = reserved.get(t.value, "IDENTIFIER")
334
335 # Set value to bool if BOOLEAN type was returned from reserved dict.
336 if t.type == "BOOLEAN":
337 t.value = t.value == "True" or t.value == "true"
338 # Take care of special infinity and nan values.
339 if t.type == "DOUBLE":
340 t.value = float(t.value)
341 # \cond false positive doxygen warning
342 if t.type == "IDENTIFIER":
343 try:
344 if self.text[self.index] == "(":
345 # Check that closing parenthesis exists
346 if ")" not in self.text[self.index:]:
347 raise SyntaxError("Unmatched '('")
348 else:
349 self.push_state(B2ParameterLexer)
350 except IndexError:
351 pass
352 return t
353 # \endcond
354
355
356def parser_class_decorator(cls, parser_type):
357 """
358 Class decorator which allows creating a Parser class object
359 for the B2Parser and B2ExpressionParser without repeating the class body.
360
361 Args:
362 parser_type (str): choice of parser type, 'cut' or 'expression'
363
364 Returns:
365 (type): returns a parser class object
366 """
367 assert parser_type in (
368 "cut",
369 "expression",
370 ), "Invalid parser type, valid choices are 'cut' or 'expression'"
371
372 class B2ParserMixin(cls):
373 """
374 Parser class implementing the grammar specified below.
375
376 Full Grammar Specification:
377 <cut> ::= EMPTY
378 | <boolean_expression>
379
380 <boolean_expression> ::= <disjunction>
381
382 <disjunction> ::= <conjunction>
383 | <disjunction> OR <conjunction>
384
385 <conjunction> ::= <negation>
386 | <conjunction> AND <negation>
387
388 <negation> ::= <bracket_expression>
389 | NOT <negation>
390
391 <bracket_expression> ::= <relational_expression>
392 | LBRACK <boolean_expression> RBRACK
393
394 <relational_expression> ::= <expression>
395 | <expression> <comparison_operator> <expression>
396 | <expression> <comparison_operator> <expression>
397 <comparison_operator> <expression>
398
399 <comparison_operator> ::= EQUALEQUAL
400 | GREATER
401 | LESS
402 | GREATEREQUAL
403 | LESSEQUAL
404 | NOTEQUAL
405
406 <expression> ::= <sum>
407
408 <sum> ::= <term>
409 | <sum> PLUS <term>
410 | <sum> MINUS <term>
411
412 <term> ::= <factor>
413 | <term> TIMES <factor>
414 | <term> DIVIDE <factor>
415
416 <factor> ::= <power>
417 | PLUS <factor>
418 | MINUS <factor>
419
420 <power> ::= <primary>
421 | <primary> POWER <factor>
422
423 <primary> ::= LPAREN <expression> RPAREN
424 | <function>
425 | IDENTIFIER
426 | INTEGER
427 | BOOLEAN
428 | DOUBLE
429
430 <function> ::= IDENTIFIER ARGUMENTTUPLE
431 """
432
433 def __init__(self, verbose=False):
434 """
435 Initialize Parser
436 @param verbose run parser in verbose mode. The nodetype names in
437 the parsed tuple are written out and not encoded
438 as integers. Useful for debugging parsing errors.
439 """
440 super().__init__()
441 ## verbose setting, creates more human readable tuple output
442 ## only for testing, debugging purposes
443 ## not used in production, as default of kwarg is False
444 self.verbose = verbose
445 ## parameter state stack
446 ## used for scope detection of variables and metavariables
447 self.parameter_stack = list()
448
449 if parser_type == "cut":
450 ## token list for B2Parser include cut specific tokens
451 tokens = B2Lexer.tokens.union(B2ParameterLexer.tokens)
452 else:
453 ## token list for B2ExpressionParser exclude cut specific tokens
454 tokens = B2Lexer.expression_tokens.union(B2ParameterLexer.tokens)
455 start = parser_type
456 # Define precedence of operators starting with lowest precedence
457 # first element of tuple indicates associativity of operator
458 if parser_type == "cut":
459 ## Precedence definition for B2Parser
460 precedence = ( # noqa: F841
461 ("left", "OR"),
462 ("left", "AND"),
463 ("nonassoc", "NOT"),
464 ("left", "EQUALEQUAL", "GREATER", "LESS",
465 "GREATEREQUAL", "LESSEQUAL", "NOTEQUAL"),
466 ("left", "PLUS", "MINUS"),
467 ("left", "TIMES", "DIVIDE"),
468 ("right", "POWER"),
469 )
470 else:
471 ## Reduced precedence definition for B2ExpressionParser
472 precedence = ( # noqa: F841
473 ("left", "PLUS", "MINUS"),
474 ("left", "TIMES", "DIVIDE"),
475 ("right", "POWER"),
476 )
477 ## Dict for encoding nodetypes to integers
478 ## Must match enum in framework/utilities/AbstractNodes.h
479 node_types = {
480 "UnaryBooleanNode": 0,
481 "BinaryBooleanNode": 1,
482 "UnaryRelationalNode": 2,
483 "BinaryRelationalNode": 3,
484 "TernaryRelationalNode": 4,
485 "UnaryExpressionNode": 5,
486 "BinaryExpressionNode": 6,
487 "FunctionNode": 7,
488 "IdentifierNode": 8,
489 "DoubleNode": 9,
490 "IntegerNode": 10,
491 "BooleanNode": 11,
492 }
493
494 ## Dict for encoding boolean operator types to integers
495 ## Must match BooleanOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
496 b_operator_types = {
497 "and": 0,
498 "or": 1,
499 }
500
501 ## Dict for encoding comparison operator types to integers
502 ## Must match ComparisonOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
503 c_operator_types = {
504 "==": 0,
505 ">=": 1,
506 "<=": 2,
507 ">": 3,
508 "<": 4,
509 "!=": 5,
510 }
511
512 ## Dict for encoding arithmetic operator types to integers
513 ## Must match ArithmeticOperator enum in framework/utilities/AbstractNodes.h # noqa: E501
514 a_operation_types = {
515 "+": 0,
516 "-": 1,
517 "*": 2,
518 "/": 3,
519 "**": 4,
520 "^": 4
521 }
522
523 def get_node_type(self, node_name: str):
524 """
525 Return the node type integer value
526 or node name if verbose setting is chosen.
527 """
528 return node_name if self.verbose else self.node_types[node_name]
529
530 def get_coper_type(self, coper_name: str):
531 """
532 Return the comparison operator type integer value
533 or comparison operator name if verbose setting is chosen.
534 """
535 return coper_name if self.verbose else self.c_operator_types[coper_name] # noqa: E501
536
537 def get_boper_type(self, boper_name: str):
538 """
539 Return the boolean operator type integer value
540 or boolean operator name if verbose setting is chosen.
541 """
542 return boper_name if self.verbose else self.b_operator_types[boper_name] # noqa: E501
543
544 def get_a_operation_type(self, operation_name: str):
545 """
546 Return the arithmetic operator type integer value
547 or arithmetic operator token if verbose setting is chosen.
548 """
549 return operation_name if self.verbose else self.a_operation_types[operation_name] # noqa: E501
550
551 if parser_type == "cut":
552 @_(r"", r"boolean_expression",) # noqa: F821
553 def cut(self, p):
554 """
555 Parsing function for <cut> nonterminal
556
557 Grammar rules:
558 <cut> ::= EMPTY
559 | <boolean_expression>
560 """
561 try:
562 return p.boolean_expression
563 except AttributeError:
564 return (
565 self.get_node_type("UnaryRelationalNode"),
566 (
567 self.get_node_type("BooleanNode"),
568 True
569 )
570 )
571
572 @_(r"disjunction") # noqa: F821
573 def boolean_expression(self, p):
574 """
575 Parsing function for <boolean_expression> nonterminal
576
577 Grammar rule:
578 <boolean_expression> ::= <disjunction>
579 """
580 return p.disjunction
581
582 @_(r"disjunction OR conjunction", r"conjunction") # noqa: F821
583 def disjunction(self, p):
584 """
585 Parsing function for <disjunction> nonterminal
586
587 Grammar rules:
588 <disjunction> ::= <conjunction>
589 | <disjunction> OR <conjunction>
590 """
591 try:
592 return (
593 self.get_node_type("BinaryBooleanNode"),
594 p.disjunction,
595 p.conjunction,
596 self.get_boper_type(p.OR),
597 )
598 except AttributeError:
599 return p.conjunction
600
601 @_(r"conjunction AND negation", r"negation") # noqa: F821
602 def conjunction(self, p):
603 """
604 Parsing function for <conjunction> nonterminal
605
606 Grammar rules:
607 <conjunction> ::= <negation>
608 | <conjunction> AND <negation>
609 """
610 try:
611 return (
612 self.get_node_type("BinaryBooleanNode"),
613 p.conjunction,
614 p.negation,
615 self.get_boper_type(p.AND),
616 )
617 except AttributeError:
618 return p.negation
619
620 @_(r"bracket_expression", r"NOT negation") # noqa: F821
621 def negation(self, p):
622 """
623 Parsing function for <negation> nonterminal
624
625 Grammar rules:
626 <negation> ::= <bracket_expression>
627 | NOT <negation>
628 """
629 try:
630 return p.bracket_expression
631 except AttributeError:
632 return (
633 self.get_node_type("UnaryBooleanNode"),
634 p.negation,
635 True,
636 False,
637 )
638
639 @_( # noqa: F821
640 r"relational_expression",
641 r"LBRACK boolean_expression RBRACK")
642 def bracket_expression(self, p):
643 """
644 Parsing function for <bracket_expression> nonterminal
645
646 Grammar rules:
647 <bracket_expression> ::= <relational_expression>
648 | LBRACK <boolean_expression> RBRACK
649 """
650 try:
651 return p.relational_expression
652 except AttributeError:
653 return (
654 self.get_node_type("UnaryBooleanNode"),
655 p.boolean_expression,
656 False,
657 True,
658 )
659
660 @_(r"expression") # noqa: F821
661 def relational_expression(self, p): # noqa: F811
662 """
663 Parsing function for <relational_expression> nonterminal
664
665 Grammar rule:
666 <relational_expression> ::= <expression>
667 """
668
669 return (self.get_node_type("UnaryRelationalNode"), p.expression)
670
671 @_(r"expression comparison_operator expression") # noqa: F821
672 def relational_expression(self, p): # noqa: F811
673 """
674 Parsing function for <relational_expression> nonterminal
675
676 Grammar rule:
677 <relational_expression> ::= <expression> <comparison_operator>
678 <expression>
679 """
680 return (
681 self.get_node_type("BinaryRelationalNode"),
682 p.expression0,
683 p.expression1,
684 self.get_coper_type(p.comparison_operator),
685 )
686
687 @_(r"expression comparison_operator expression comparison_operator expression") # noqa: F821, E501
688 def relational_expression(self, p): # noqa: F811
689 """
690 Parsing function for <relational_expression> nonterminal
691
692 Grammar rule:
693 <relational_expression> ::= expression> <comparison_operator>
694 <expression> <comparison_operator> <expression>
695 """
696 return (
697 self.get_node_type("TernaryRelationalNode"),
698 p.expression0,
699 p.expression1,
700 p.expression2,
701 self.get_coper_type(p.comparison_operator0),
702 self.get_coper_type(p.comparison_operator1),
703 )
704
705 @_( # noqa: F821
706 r"EQUALEQUAL",
707 r"GREATER",
708 r"LESS",
709 r"GREATEREQUAL",
710 r"LESSEQUAL",
711 r"NOTEQUAL",
712 )
713 def comparison_operator(self, p):
714 """
715 Parsing function for <comparison_operator> nonterminal
716
717 Grammar rules:
718 <comparison_operator> ::= EQUALEQUAL
719 | GREATER
720 | LESS
721 | GREATEREQUAL
722 | LESSEQUAL
723 | NOTEQUAL
724 """
725 return p[0]
726
727 @_(r"sum") # noqa: F821
728 def expression(self, p):
729 """
730 Parsing function for <expression> nonterminal
731
732 Grammar rule:
733 <expression> ::= <sum>
734 """
735 return p.sum
736
737 @_(r"sum PLUS term", r"sum MINUS term", r"term") # noqa: F821
738 def sum(self, p):
739 """
740 Parsing function for <sum> nonterminal
741
742 Grammar rules:
743 <sum> ::= <term>
744 | <sum> PLUS <term>
745 | <sum> MINUS <term>
746 """
747 try:
748 return (
749 self.get_node_type("BinaryExpressionNode"),
750 p.sum,
751 p.term,
752 self.get_a_operation_type(p[1]),
753 )
754 except AttributeError:
755 return p.term
756
757 @_(r"term TIMES factor", r"term DIVIDE factor", r"factor") # noqa: F821, E501
758 def term(self, p):
759 """
760 Parsing function for <term> nonterminal
761
762 Grammar rules:
763 <term> ::= <factor>
764 | <term> TIMES <factor>
765 | <term> DIVIDE <factor>
766 """
767 try:
768 return (
769 self.get_node_type("BinaryExpressionNode"),
770 p.term,
771 p.factor,
772 self.get_a_operation_type(p[1]),
773 )
774 except AttributeError:
775 return p.factor
776
777 @_(r"power") # noqa: F821
778 def factor(self, p):
779 """
780 Parsing function for <power> nonterminal
781
782 Grammar rule:
783 <factor> ::= <power>
784 """
785 return p.power
786
787 @_(r"PLUS factor") # noqa: F821
788 def factor(self, p): # noqa: F811
789 """
790 Parsing function for <factor> nonterminal
791
792 Grammar rules:
793 <factor> ::= PLUS <factor>
794 """
795 return (
796 self.get_node_type("UnaryExpressionNode"),
797 p.factor,
798 False,
799 False,
800 )
801
802 @_(r"MINUS factor") # noqa: F821
803 def factor(self, p): # noqa: F811
804 """
805 Parsing function for <factor> nonterminal
806
807 Grammar rule:
808 <factor> ::= MINUS factor
809 """
810 return (
811 self.get_node_type("UnaryExpressionNode"),
812 p.factor,
813 True,
814 False,
815 )
816
817 @_(r"primary") # noqa: F821
818 def power(self, p):
819 """
820 Parsing function for <power> nonterminal
821
822 Grammar rule:
823 <power> ::= <primary>
824 """
825 return p.primary
826
827 @_(r"primary POWER factor") # noqa: F821
828 def power(self, p): # noqa: F811
829 """
830 Parsing function for <power> nonterminal
831
832 Grammar rule:
833 <power> ::= <primary> POWER <factor>
834 """
835 return (
836 self.get_node_type("BinaryExpressionNode"),
837 p.primary,
838 p.factor,
839 self.get_a_operation_type(p.POWER),
840 )
841
842 @_(r"function") # noqa: F821
843 def primary(self, p):
844 """
845 Parsing function for <primary> nonterminal
846
847 Grammar rule:
848 <primary> ::= <function>
849 """
850 return p.function
851
852 @_(r"LPAREN expression RPAREN") # noqa: F821
853 def primary(self, p): # noqa: F811
854 """
855 Parsing function for <primary> nonterminal
856
857 Grammar rule:
858 <primary> ::= LPAREN <expression> RPAREN
859 """
860 return (
861 self.get_node_type("UnaryExpressionNode"),
862 p.expression,
863 False,
864 True,
865 )
866
867 @_(r"INTEGER") # noqa: F821
868 def primary(self, p): # noqa: F811
869 """
870 Parsing function for <primary> nonterminal
871
872 Grammar rule:
873 <primary> ::= INTEGER
874 """
875 return (self.get_node_type("IntegerNode"), p.INTEGER)
876
877 @_(r"DOUBLE") # noqa: F821
878 def primary(self, p): # noqa: F811
879 """
880 Parsing function for <primary> nonterminal
881
882 Grammar rule:
883 <primary> ::= DOUBLE
884 """
885 return (self.get_node_type("DoubleNode"), p.DOUBLE)
886
887 @_(r"BOOLEAN") # noqa: F821
888 def primary(self, p): # noqa: F811
889 """
890 Parsing function for <primary> nonterminal
891
892 Grammar rule:
893 <primary> ::= BOOLEAN
894 """
895 return (self.get_node_type("BooleanNode"), p.BOOLEAN)
896
897 @_(r"IDENTIFIER") # noqa: F821
898 def primary(self, p): # noqa: F811
899 """
900 Parsing function for <primary> nonterminal
901
902 Grammar rule:
903 <primary> ::= IDENTIFIER
904 """
905 if self.parameter_stack:
906 return (
907 self.get_node_type("IdentifierNode"),
908 p.IDENTIFIER,
909 )
910 else:
911 return (
912 self.get_node_type("IdentifierNode"),
913 p.IDENTIFIER,
914 )
915
916 @_(r"IDENTIFIER ARGUMENTTUPLE") # noqa: F821
917 def function(self, p):
918 """
919 Parsing function for <function> nonterminal
920
921 Grammar rule:
922 <function> ::= IDENTIFIER LPAREN <parameters> RPAREN
923 """
924 if self.parameter_stack:
925 return (
926 self.get_node_type("FunctionNode"),
927 p.IDENTIFIER,
928 p.ARGUMENTTUPLE[1:-1],
929 )
930 else:
931 return (
932 self.get_node_type("FunctionNode"),
933 p.IDENTIFIER,
934 p.ARGUMENTTUPLE[1:-1],
935 )
936
937 def error(self, p):
938 """
939 Error function, called immediately if syntax error is detected
940 @param p (sly.token) offending token p
941 p is None if syntax error occurs at EOF.
942 """
943 try:
944 # Get error position of offending token in cut.
945 error_pos = p.index
946 except AttributeError: # syntax error at EOF, p is None
947 # Set error position to length of cut minus one.
948 error_pos = len(self.cut) - 1
949 try:
950 # Get error token type
951 error_token = p.type
952 except AttributeError:
953 # syntax error at EOF get last token from stack
954 error_token = self.symstack[-1].type
955
956 # Format error message
957 error_msg = f"detected at:\n{self.cut}\n{' '*error_pos}^\n"
958 error_msg += f"Unexpected token '{error_token}'"
959 raise SyntaxError(error_msg)
960
961 def parse(self, cut: str, token_generator) -> tuple:
962 """
963 Overwrite sly.Parser parse function.
964 @param cut unparsed cut input which is used to
965 indicate where the error occurred
966 @param token_generator generator object which yields tokens.
967 Produced by the lexer from the cut input.
968 """
969 ## Set cut attribute needed in case of an error.
970 self.cut = cut
971 return super().parse(token_generator)
972
973 return B2ParserMixin
974
975
976B2Parser = parser_class_decorator(Parser, parser_type="cut")
977
978B2ExpressionParser = parser_class_decorator(Parser, parser_type="expression")
979
980
981def parse(cut: str, verbose=False) -> tuple:
982 """
983 Initialize a parser and lexer object and parse cut
984 @param cut cut string which should be parsed
985 @param verbose provide verbose parsing output for
986 parser debugging purposes, not to be set true in production
987 """
988 lexer = B2Lexer()
989 parser = B2Parser(verbose)
990 return parser.parse(cut, lexer.tokenize(cut))
991
992
993def parse_expression(cut: str, verbose=False) -> tuple:
994 """
995 Initialize a parser and lexer object and parse cut
996 @param cut cut string which should be parsed
997 @param verbose provide verbose parsing output for
998 parser debugging purposes, not to be set true in production
999 """
1000 lexer = B2Lexer()
1001 parser = B2ExpressionParser(verbose)
1002 return parser.parse(cut, lexer.tokenize(cut))
1003
1004
1005if __name__ == "__main__":
1006 argparser = argparse.ArgumentParser()
1007 argparser.add_argument(
1008 "-e", "--expression", action="store_const", default=0, const=1
1009 )
1010 args = argparser.parse_args()
1011 if args.expression:
1012 cut = input("Please input expression:\n")
1013 print(parse_expression(cut))
1014 else:
1015 cut = input("Please input cut:\n")
1016 print(parse(cut))
index
Increment current scanning position.
Definition b2parser.py:66