Belle II Software development
FormulaParser.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <framework/utilities/FormulaParser.h>
10#include <stdexcept>
11#include <cmath>
12
13namespace Belle2 {
20 {
21 switch (op) {
22 case EOperator::c_noop: return ' ';
23 case EOperator::c_plus: return '+';
24 case EOperator::c_minus: return '-';
25 case EOperator::c_multiply: return '*';
26 case EOperator::c_divide: return '/';
27 case EOperator::c_power: return '^';
28 case EOperator::c_roundBracketOpen: return '(';
29 case EOperator::c_roundBracketClose: return ')';
30 case EOperator::c_squareBracketOpen: return '[';
31 case EOperator::c_squareBracketClose: return ']';
32 }
33 return 0;
34 }
35
36 double FormulaParserBase::applyOperator(EOperator op, double a, double b)
37 {
38 switch (op) {
39 case EOperator::c_plus: return a + b;
40 case EOperator::c_minus: return a - b;
41 case EOperator::c_multiply: return a * b;
42 case EOperator::c_divide: return a / b;
43 case EOperator::c_power: return std::pow(a, b);
44 default: throw std::runtime_error(std::string("Cannot apply operator ") + operatorToChar(op));
45 }
46 return 0;
47 }
48
50 {
51 switch (current) {
52 case ENumberStatus::c_Invalid:
53 // invalid stays invalid
54 return ENumberStatus::c_Invalid;
55 case ENumberStatus::c_Empty:
56 // numbers are allowed to start with digits, a dot or a sign
57 if (std::isdigit(next)) return ENumberStatus::c_Int;
58 if (next == '.') return ENumberStatus::c_LeadingDot;
59 if (next == '+' or next == '-') return ENumberStatus::c_Sign;
60 // everything else we don't like
61 return ENumberStatus::c_Invalid;
62 case ENumberStatus::c_Sign:
63 // if we started with a sign we can only go to digit and dots, no exponent
64 if (std::isdigit(next)) return ENumberStatus::c_Int;
65 if (next == '.') return ENumberStatus::c_Dot;
66 return ENumberStatus::c_Invalid;
67 case ENumberStatus::c_Int:
68 // So far it's a valid int consisting only of [sign +] digits, next
69 // stage is more digits, a . or an exponent
70 if (std::isdigit(next)) return ENumberStatus::c_Int;
71 if (next == '.') return ENumberStatus::c_Dot;
72 if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
73 return ENumberStatus::c_Invalid;
74 case ENumberStatus::c_Dot:
75 // After the dot there can be more digits ... or a exponent
76 if (std::isdigit(next)) return ENumberStatus::c_Float;
77 if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
78 return ENumberStatus::c_Invalid;
79 case ENumberStatus::c_LeadingDot:
80 // But if the dot was in the beginning then no exponent
81 if (std::isdigit(next)) return ENumberStatus::c_Float;
82 return ENumberStatus::c_Invalid;
83 // so, we saw some digits after the dot ... more digits or exponent it is
84 case ENumberStatus::c_Float:
85 if (std::isdigit(next)) return ENumberStatus::c_Float;
86 if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
87 return ENumberStatus::c_Invalid;
88 case ENumberStatus::c_Exponent:
89 // and for the exponent we need either additional digits or a sign
90 if (std::isdigit(next)) return ENumberStatus::c_Scientific;
91 if (next == '+' or next == '-') return ENumberStatus::c_ExponentSign;
92 return ENumberStatus::c_Invalid;
93 case ENumberStatus::c_ExponentSign:
94 case ENumberStatus::c_Scientific:
95 // and after the exponent sign and any digit thereafter only digits are possible
96 if (std::isdigit(next)) return ENumberStatus::c_Scientific;
97 return ENumberStatus::c_Invalid;
98 }
99 return ENumberStatus::c_Invalid;
100 }
101
103 {
104 // we only have binary operators so we need two operands
105 if (stacksize < 1)
106 throw std::runtime_error("could not parse, stack of operands empty. Please report, this is most likely a bug");
107 if (stacksize < 2)
108 throw std::runtime_error("Missing operand");
109 }
110
112 {
114 //the last thing we added was a variable so a bracket doesn't make sense
115 if (!m_lastTokenWasOperator) throw std::runtime_error("missing operator");
116 // otherwise, ont the stack it goes
117 m_operatorStack.push(op);
118 return;
119 }
121 // closing bracket. Look for a matching opening bracket and execute all
122 // operators until then
125 if (op == EOperator::c_squareBracketClose) std::swap(correct, wrong);
126 while (!m_operatorStack.empty()) {
127 EOperator tok = m_operatorStack.top();
128 m_operatorStack.pop();
129 if (tok == wrong) throw std::runtime_error("wrong type of closing bracket");
130 if (tok == correct) return;
131 executeOperator(tok);
132 }
133 // stack is empty, still no bracket
134 throw std::runtime_error("unmatched bracket");
135 }
136
137 // Ok, now normal operators: there shouldn't be two in a row
138 if (m_lastTokenWasOperator) throw std::runtime_error("missing operand before operator");
140
141 // The operator precedence is in the upper 4 bits ... hrhr
142 // TODO: make a function for this?
143 int op_precedence = (int)op >> 4;
144 while (!m_operatorStack.empty()) {
145 EOperator tok = m_operatorStack.top();
146 // Stop at brackets
148 int tok_precedence = (int)tok >> 4;
149 // Pow operator has right assiocativity, all others are left associative
150 // TODO: make nicer?
151 bool tok_right = op == EOperator::c_power;
152 // If the token has lower precedence or equal precedence but is right associative stop taking tokens
153 if (tok_precedence < op_precedence or (tok_precedence == op_precedence and tok_right)) break;
154 // otherwise pop and execute
155 executeOperator(tok);
156 m_operatorStack.pop();
157 }
158 m_operatorStack.push(op);
159 }
160
162 {
163 while (!m_operatorStack.empty()) {
164 EOperator op = m_operatorStack.top();
165 m_operatorStack.pop();
166 // found a bracket but no more closing brackets to come ... so error
168 throw std::runtime_error("missing closing bracket");
169 executeOperator(op);
170 }
171 }
172
174 {
175 if (!m_currentVariableName.empty()) {
176 if (!m_lastTokenWasOperator) throw std::runtime_error("Missing operator before variable");
178 // looks like a number, so add a number
180 char* ptr;
181 double value;
182 value = std::strtod(m_currentVariableName.c_str(), &ptr);
183 addVariable(InputToken(value));
184 } else {
186 }
187 }
188 m_currentVariableName.clear();
190 }
191
193 {
194 if (next == '+' or next == '-') {
195 // plus and minus are also part of literals so only treat it as operator
196 // if, together with the next character, this is not a valid float literal
197 auto isvalid = checkNumber(m_currentVariableNameNumberStatus, next);
198 if (isvalid != ENumberStatus::c_Invalid and checkNumber(isvalid, m_buffer.peek()) != ENumberStatus::c_Invalid) {
199 // this looks like a number don't interpret as operator
200 return EOperator::c_noop;
201 }
202 if (next == '+') return EOperator::c_plus;
203 if (next == '-') return EOperator::c_minus;
204 }
205 if (next == '/') return EOperator::c_divide;
206 if (next == '^') return EOperator::c_power;
207 if (next == '*') {
208 // is it python style '**'? if yes, remove one char from stream and
209 // assume pow
210 if (m_buffer.peek() == '*') {
211 m_buffer.get();
212 return EOperator::c_power;
213 }
214 // otherwise multiply
215 return EOperator::c_multiply;
216 }
217 if (next == '(') return EOperator::c_roundBracketOpen;
218 if (next == ')') return EOperator::c_roundBracketClose;
219 if (next == '[') return EOperator::c_squareBracketOpen;
220 if (next == ']') return EOperator::c_squareBracketClose;
221 // no operator, so let's return just that
222 return EOperator::c_noop;
223 }
224
225 void FormulaParserBase::processString(const std::string& formula)
226 {
227 // initialize buffer
228 m_buffer = std::istringstream(formula);
229 // clear stacks
230 std::stack<EOperator>().swap(m_operatorStack);
231 // and an empty identifier name
232 m_currentVariableName.clear();
234 // reset some other variable state
236 // and if the variable has arguments remember the nesting level of the ()
237 int nestlevel{0};
238 // Now loop over the whole formula character by character
239 for (char next; m_buffer.get(next);) {
240 // If nestlevel>0 we are in a variable(...) parameters area so ignore
241 // everything but keep track of how many open/closing brackets we saw
242 // until we are back to nestlevel=0
243 if (nestlevel > 0) {
244 m_currentVariableName += next;
245 if (next == '(') ++nestlevel;
246 if (next == ')') --nestlevel;
247 // finished variable arguments so variable is definitely done
248 if (nestlevel == 0) flushCurrentVariable();
249 // done with this character
250 continue;
251 }
252
253 // check for opening parenthesis: could be variable arguments or operation binding
254 if (next == '(' and not m_currentVariableName.empty()) {
255 m_currentVariableName += next;
256 ++nestlevel;
257 // definitely not a number anymore
259 // done with this character
260 continue;
261 }
262
263 // check for operator
264 auto opcode = checkForOperator(next);
265 if (opcode != EOperator::c_noop) {
266 // found operator, flush variable, add operator
268 addOperator(opcode);
269 // done with this character
270 continue;
271 }
272
273 // check for whitespace
274 if (next == ' ' or next == '\n' or next == '\t' or next == '\r') {
275 // variable is finished, just flush here.
277 // otherwise nothing to do with whitespace ...
278 continue;
279 }
280
281 // anything else is a identifier, most likely a variable name or a
282 // float literal now lets build up the variable name, first lets check
283 // if the variable name will still be a valid number
285 // then just add it to the state
286 m_currentVariableName += next;
287 }
288 if (nestlevel > 0) throw std::runtime_error("unterminated variable arguments");
289 // done with parsing, lets make sure everything is flushed
292 }
293
294 void FormulaParserBase::raiseError(const std::runtime_error& e)
295 {
296 // So lets some fun printing the error message :D
297 std::ostringstream message;
298 // check where we stopped parsing
299 auto pos = m_buffer.tellg();
300 // -1 -> after the end
301 if (pos == -1) pos = m_buffer.str().size() + 1;
302 // basic boring message + reason
303 message << "Error parsing formula at character " << pos << ": " << e.what() << std::endl;
304 // now lets go through the formula again, line by line. YES, multi line formula are a thing
305 std::istringstream errbuff(m_buffer.str());
306 long lastpos = 0;
307 bool arrowShown{false};
308 for (std::string line; std::getline(errbuff, line);) {
309 // print each line
310 message << " " << line << std::endl;
311 // and get the position after each line
312 auto curpos = errbuff.tellg();
313 // if it's the last line or if we are now beyond the error position then print an arrow
314 if (!arrowShown && (curpos == -1 || curpos >= pos)) { // -1 = last line
315 // from the beginning to the line to the position of the error
316 for (long i = lastpos - 1; i < pos; ++i) message << "-";
317 message << "^" << std::endl;
318 // but only show it once
319 arrowShown = true;
320 }
321 lastpos = curpos;
322 }
323 throw std::runtime_error(message.str());
324 }
326}
std::string m_currentVariableName
collect characters into a variable name
virtual void executeOperator(EOperator op)=0
Execute an operator on the current state.
virtual void addVariable(const InputToken &token)=0
Add a variable token to the current state.
std::stack< EOperator > m_operatorStack
Stack of operators for the Shunting-yard algorithm.
EOperator
List of known operators.
Definition: FormulaParser.h:31
@ c_roundBracketClose
Close round bracket.
@ c_roundBracketOpen
Open round bracket.
@ c_squareBracketOpen
Open square bracket.
@ c_squareBracketClose
Close square bracket.
std::variant< std::string, double > InputToken
Input token type: an input tokein is either a string or a float variable.
Definition: FormulaParser.h:59
std::istringstream m_buffer
Buffer for the formula.
ENumberStatus
States of a string literal when checking if it is a valid number.
Definition: FormulaParser.h:45
ENumberStatus m_currentVariableNameNumberStatus
State of the current variable name being a valid float literal.
bool m_lastTokenWasOperator
Bool to check whether there were consecutive operators or variables.
EOperator checkForOperator(char next)
Check if the next character is a operator.
void flushPendingOperators()
Flush all pending operators at the end of processing.
void addOperator(EOperator op)
Add an operator to the internal state, convert them to reverse polish notation using the shunting yar...
void processString(const std::string &formula)
Process the given formula and store the final state.
static double applyOperator(EOperator op, double a, double b)
Apply operator on two values.
static void assertOperatorUsable(size_t stacksize)
Make sure we have enough operands to use an operator.
void raiseError(const std::runtime_error &e)
Format the given runtime_error with context information and rethrow a new one.
void flushCurrentVariable()
Flush the currently parsed variable name and add it to the state either as variable or number.
static char operatorToChar(EOperator op) noexcept
Convert operator code to character.
static ENumberStatus checkNumber(ENumberStatus current, char next)
Check if a string literal with a given number status continues to be a valid number if next is append...
Abstract base class for different kinds of events.