Belle II Software  release-06-02-00
FormulaParser.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <framework/utilities/FormulaParser.h>
10 #include <stdexcept>
11 #include <cmath>
12 
13 namespace Belle2 {
20  {
21  switch (op) {
22  case EOperator::c_noop: return ' ';
23  case EOperator::c_plus: return '+';
24  case EOperator::c_minus: return '-';
25  case EOperator::c_multiply: return '*';
26  case EOperator::c_divide: return '/';
27  case EOperator::c_power: return '^';
28  case EOperator::c_roundBracketOpen: return '(';
29  case EOperator::c_roundBracketClose: return ')';
30  case EOperator::c_squareBracketOpen: return '[';
31  case EOperator::c_squareBracketClose: return ']';
32  }
33  return 0;
34  }
35 
36  double FormulaParserBase::applyOperator(EOperator op, double a, double b)
37  {
38  switch (op) {
39  case EOperator::c_plus: return a + b;
40  case EOperator::c_minus: return a - b;
41  case EOperator::c_multiply: return a * b;
42  case EOperator::c_divide: return a / b;
43  case EOperator::c_power: return std::pow(a, b);
44  default: throw std::runtime_error(std::string("Cannot apply operator ") + operatorToChar(op));
45  }
46  return 0;
47  }
48 
50  switch (current)
51  {
52  case ENumberStatus::c_Invalid:
53  // invalid stays invalid
54  return ENumberStatus::c_Invalid;
55  case ENumberStatus::c_Empty:
56  // numbers are allowed to start with digits, a dot or a sign
57  if (std::isdigit(next)) return ENumberStatus::c_Int;
58  if (next == '.') return ENumberStatus::c_LeadingDot;
59  if (next == '+' or next == '-') return ENumberStatus::c_Sign;
60  // everything else we don't like
61  return ENumberStatus::c_Invalid;
62  case ENumberStatus::c_Sign:
63  // if we started with a sign we can only go to digit and dots, no exponent
64  if (std::isdigit(next)) return ENumberStatus::c_Int;
65  if (next == '.') return ENumberStatus::c_Dot;
66  return ENumberStatus::c_Invalid;
67  case ENumberStatus::c_Int:
68  // So far it's a valid int consisting only of [sign +] digits, next
69  // stage is more digits, a . or an exponent
70  if (std::isdigit(next)) return ENumberStatus::c_Int;
71  if (next == '.') return ENumberStatus::c_Dot;
72  if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
73  return ENumberStatus::c_Invalid;
74  case ENumberStatus::c_Dot:
75  // After the dot there can be more digits ... or a exponent
76  if (std::isdigit(next)) return ENumberStatus::c_Float;
77  if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
78  return ENumberStatus::c_Invalid;
79  case ENumberStatus::c_LeadingDot:
80  // But if the dot was in the beginning then no exponent
81  if (std::isdigit(next)) return ENumberStatus::c_Float;
82  return ENumberStatus::c_Invalid;
83  // so, we saw some digits after the dot ... more digits or exponent it is
84  case ENumberStatus::c_Float:
85  if (std::isdigit(next)) return ENumberStatus::c_Float;
86  if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
87  return ENumberStatus::c_Invalid;
88  case ENumberStatus::c_Exponent:
89  // and for the exponent we need either additional digits or a sign
90  if (std::isdigit(next)) return ENumberStatus::c_Scientific;
91  if (next == '+' or next == '-') return ENumberStatus::c_ExponentSign;
92  return ENumberStatus::c_Invalid;
93  case ENumberStatus::c_ExponentSign:
94  case ENumberStatus::c_Scientific:
95  // and after the exponent sign and any digit thereafter only digits are possible
96  if (std::isdigit(next)) return ENumberStatus::c_Scientific;
97  return ENumberStatus::c_Invalid;
98  }
99  return ENumberStatus::c_Invalid;
100  }
101 
103  {
104  // we only have binary operators so we need two operands
105  if (stacksize < 1)
106  throw std::runtime_error("could not parse, stack of operands empty. Please report, this is most likely a bug");
107  if (stacksize < 2)
108  throw std::runtime_error("Missing operand");
109  }
110 
112  {
114  //the last thing we added was a variable so a bracket doesn't make sense
115  if (!m_lastTokenWasOperator) throw std::runtime_error("missing operator");
116  // otherwise, ont the stack it goes
117  m_operatorStack.push(op);
118  return;
119  }
121  // closing bracket. Look for a matching opening bracket and execute all
122  // operators until then
125  if (op == EOperator::c_squareBracketClose) std::swap(correct, wrong);
126  while (!m_operatorStack.empty()) {
127  EOperator tok = m_operatorStack.top();
128  m_operatorStack.pop();
129  if (tok == wrong) throw std::runtime_error("wrong type of closing bracket");
130  if (tok == correct) return;
131  executeOperator(tok);
132  }
133  // stack is empty, still no bracket
134  throw std::runtime_error("unmatched bracket");
135  }
136 
137  // Ok, now normal operators: there shouldn't be two in a row
138  if (m_lastTokenWasOperator) throw std::runtime_error("missing operand before operator");
139  m_lastTokenWasOperator = true;
140 
141  // The operator precedence is in the upper 4 bits ... hrhr
142  // TODO: make a function for this?
143  int op_precedence = (int)op >> 4;
144  while (!m_operatorStack.empty()) {
145  EOperator tok = m_operatorStack.top();
146  // Stop at brackets
148  int tok_precedence = (int)tok >> 4;
149  // Pow operator has right assiocativity, all others are left associative
150  // TODO: make nicer?
151  bool tok_right = op == EOperator::c_power;
152  // If the token has lower precedence or equal precedence but is right associative stop taking tokens
153  if (tok_precedence < op_precedence or (tok_precedence == op_precedence and tok_right)) break;
154  // otherwise pop and execute
155  executeOperator(tok);
156  m_operatorStack.pop();
157  }
158  m_operatorStack.push(op);
159  }
160 
162  {
163  while (!m_operatorStack.empty()) {
164  EOperator op = m_operatorStack.top();
165  m_operatorStack.pop();
166  // found a bracket but no more closing brackets to come ... so error
168  throw std::runtime_error("missing closing bracket");
169  executeOperator(op);
170  }
171  }
172 
174  {
175  if (!m_currentVariableName.empty()) {
176  if (!m_lastTokenWasOperator) throw std::runtime_error("Missing operator before variable");
177  m_lastTokenWasOperator = false;
178  // looks like a number, so add a number
180  char* ptr;
181  double value;
182  value = std::strtod(m_currentVariableName.c_str(), &ptr);
183  addVariable(InputToken(value));
184  } else {
186  }
187  }
188  m_currentVariableName.clear();
190  }
191 
193  if (next == '+' or next == '-')
194  {
195  // plus and minus are also part of literals so only treat it as operator
196  // if, together with the next character, this is not a valid float literal
197  auto isvalid = checkNumber(m_currentVariableNameNumberStatus, next);
198  if (isvalid != ENumberStatus::c_Invalid and checkNumber(isvalid, m_buffer.peek()) != ENumberStatus::c_Invalid) {
199  // this looks like a number don't interpret as operator
200  return EOperator::c_noop;
201  }
202  if (next == '+') return EOperator::c_plus;
203  if (next == '-') return EOperator::c_minus;
204  }
205  if (next == '/') return EOperator::c_divide;
206  if (next == '^') return EOperator::c_power;
207  if (next == '*')
208  {
209  // is it python style '**'? if yes, remove one char from stream and
210  // assume pow
211  if (m_buffer.peek() == '*') {
212  m_buffer.get();
213  return EOperator::c_power;
214  }
215  // otherwise multiply
216  return EOperator::c_multiply;
217  }
218  if (next == '(') return EOperator::c_roundBracketOpen;
219  if (next == ')') return EOperator::c_roundBracketClose;
220  if (next == '[') return EOperator::c_squareBracketOpen;
221  if (next == ']') return EOperator::c_squareBracketClose;
222  // no operator, so let's return just that
223  return EOperator::c_noop;
224  }
225 
226  void FormulaParserBase::processString(const std::string& formula)
227  {
228  // initialize buffer
229  m_buffer = std::istringstream(formula);
230  // clear stacks
231  std::stack<EOperator>().swap(m_operatorStack);
232  // and an empty identifier name
233  m_currentVariableName.clear();
234  m_lastTokenWasOperator = true;
235  // reset some other variable state
237  // and if the variable has arguments remember the nesting level of the ()
238  int nestlevel{0};
239  // Now loop over the whole formula character by character
240  for (char next; m_buffer.get(next);) {
241  // If nestlevel>0 we are in a variable(...) parameters area so ignore
242  // everything but keep track of how many open/closing brackets we saw
243  // until we are back to nestlevel=0
244  if (nestlevel > 0) {
245  m_currentVariableName += next;
246  if (next == '(') ++nestlevel;
247  if (next == ')') --nestlevel;
248  // finished variable arguments so variable is definitely done
249  if (nestlevel == 0) flushCurrentVariable();
250  // done with this character
251  continue;
252  }
253 
254  // check for opening parenthesis: could be variable arguments or operation binding
255  if (next == '(' and not m_currentVariableName.empty()) {
256  m_currentVariableName += next;
257  ++nestlevel;
258  // definitely not a number anymore
260  // done with this character
261  continue;
262  }
263 
264  // check for operator
265  auto opcode = checkForOperator(next);
266  if (opcode != EOperator::c_noop) {
267  // found operator, flush variable, add operator
269  addOperator(opcode);
270  // done with this character
271  continue;
272  }
273 
274  // check for whitespace
275  if (next == ' ' or next == '\n' or next == '\t' or next == '\r') {
276  // variable is finished, just flush here.
278  // otherwise nothing to do with whitespace ...
279  continue;
280  }
281 
282  // anything else is a identifier, most likely a variable name or a
283  // float literal now lets build up the variable name, first lets check
284  // if the variable name will still be a valid number
286  // then just add it to the state
287  m_currentVariableName += next;
288  }
289  if (nestlevel > 0) throw std::runtime_error("unterminated variable arguments");
290  // done with parsing, lets make sure everything is flushed
293  }
294 
295  void FormulaParserBase::raiseError(const std::runtime_error& e)
296  {
297  // So lets some fun printing the error message :D
298  std::ostringstream message;
299  // check where we stopped parsing
300  auto pos = m_buffer.tellg();
301  // -1 -> after the end
302  if (pos == -1) pos = m_buffer.str().size() + 1;
303  // basic boring message + reason
304  message << "Error parsing formula at character " << pos << ": " << e.what() << std::endl;
305  // now lets go through the formula again, line by line. YES, multi line formula are a thing
306  std::istringstream errbuff(m_buffer.str());
307  long lastpos = 0;
308  bool arrowShown{false};
309  for (std::string line; std::getline(errbuff, line);) {
310  // print each line
311  message << " " << line << std::endl;
312  // and get the position after each line
313  auto curpos = errbuff.tellg();
314  // if it's the last line or if we are now beyond the error position then print an arrow
315  if (!arrowShown && (curpos == -1 || curpos >= pos)) { // -1 = last line
316  // from the beginning to the line to the position of the error
317  for (long i = lastpos - 1; i < pos; ++i) message << "-";
318  message << "^" << std::endl;
319  // but only show it once
320  arrowShown = true;
321  }
322  lastpos = curpos;
323  }
324  throw std::runtime_error(message.str());
325  }
327 }
std::string m_currentVariableName
collect characters into a variable name
virtual void executeOperator(EOperator op)=0
Execute an operator on the current state.
virtual void addVariable(const InputToken &token)=0
Add a variable token to the current state.
std::stack< EOperator > m_operatorStack
Stack of operators for the Shunting-yard algorithm.
EOperator
List of known operators.
Definition: FormulaParser.h:31
@ c_roundBracketClose
Close round bracket.
@ c_roundBracketOpen
Open round bracket.
@ c_squareBracketOpen
Open square bracket.
@ c_squareBracketClose
Close square bracket.
std::variant< std::string, double > InputToken
Input token type: an input tokein is either a string or a float variable.
Definition: FormulaParser.h:59
std::istringstream m_buffer
Buffer for the formula.
ENumberStatus
States of a string literal when checking if it is a valid number.
Definition: FormulaParser.h:45
ENumberStatus m_currentVariableNameNumberStatus
State of the current variable name being a valid float literal.
bool m_lastTokenWasOperator
Bool to check whether there were consecutive operators or variables.
EOperator checkForOperator(char next)
Check if the next character is a operator.
void flushPendingOperators()
Flush all pending operators at the end of processing.
void addOperator(EOperator op)
Add an operator to the internal state, convert them to reverse polish notation using the shunting yar...
void processString(const std::string &formula)
Process the given formula and store the final state.
static double applyOperator(EOperator op, double a, double b)
Apply operator on two values.
static void assertOperatorUsable(size_t stacksize)
Make sure we have enough operands to use an operator.
void raiseError(const std::runtime_error &e)
Format the given runtime_error with context information and rethrow a new one.
void flushCurrentVariable()
Flush the currently parsed variable name and add it to the state either as variable or number.
static char operatorToChar(EOperator op) noexcept
Convert operator code to character.
static ENumberStatus checkNumber(ENumberStatus current, char next)
Check if a string literal with a given number status continues to be a valid number if next is append...
Abstract base class for different kinds of events.