Belle II Software  release-08-01-10
FormulaParser.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <framework/utilities/FormulaParser.h>
10 #include <stdexcept>
11 #include <cmath>
12 
13 namespace Belle2 {
20  {
21  switch (op) {
22  case EOperator::c_noop: return ' ';
23  case EOperator::c_plus: return '+';
24  case EOperator::c_minus: return '-';
25  case EOperator::c_multiply: return '*';
26  case EOperator::c_divide: return '/';
27  case EOperator::c_power: return '^';
28  case EOperator::c_roundBracketOpen: return '(';
29  case EOperator::c_roundBracketClose: return ')';
30  case EOperator::c_squareBracketOpen: return '[';
31  case EOperator::c_squareBracketClose: return ']';
32  }
33  return 0;
34  }
35 
36  double FormulaParserBase::applyOperator(EOperator op, double a, double b)
37  {
38  switch (op) {
39  case EOperator::c_plus: return a + b;
40  case EOperator::c_minus: return a - b;
41  case EOperator::c_multiply: return a * b;
42  case EOperator::c_divide: return a / b;
43  case EOperator::c_power: return std::pow(a, b);
44  default: throw std::runtime_error(std::string("Cannot apply operator ") + operatorToChar(op));
45  }
46  return 0;
47  }
48 
50  {
51  switch (current) {
52  case ENumberStatus::c_Invalid:
53  // invalid stays invalid
54  return ENumberStatus::c_Invalid;
55  case ENumberStatus::c_Empty:
56  // numbers are allowed to start with digits, a dot or a sign
57  if (std::isdigit(next)) return ENumberStatus::c_Int;
58  if (next == '.') return ENumberStatus::c_LeadingDot;
59  if (next == '+' or next == '-') return ENumberStatus::c_Sign;
60  // everything else we don't like
61  return ENumberStatus::c_Invalid;
62  case ENumberStatus::c_Sign:
63  // if we started with a sign we can only go to digit and dots, no exponent
64  if (std::isdigit(next)) return ENumberStatus::c_Int;
65  if (next == '.') return ENumberStatus::c_Dot;
66  return ENumberStatus::c_Invalid;
67  case ENumberStatus::c_Int:
68  // So far it's a valid int consisting only of [sign +] digits, next
69  // stage is more digits, a . or an exponent
70  if (std::isdigit(next)) return ENumberStatus::c_Int;
71  if (next == '.') return ENumberStatus::c_Dot;
72  if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
73  return ENumberStatus::c_Invalid;
74  case ENumberStatus::c_Dot:
75  // After the dot there can be more digits ... or a exponent
76  if (std::isdigit(next)) return ENumberStatus::c_Float;
77  if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
78  return ENumberStatus::c_Invalid;
79  case ENumberStatus::c_LeadingDot:
80  // But if the dot was in the beginning then no exponent
81  if (std::isdigit(next)) return ENumberStatus::c_Float;
82  return ENumberStatus::c_Invalid;
83  // so, we saw some digits after the dot ... more digits or exponent it is
84  case ENumberStatus::c_Float:
85  if (std::isdigit(next)) return ENumberStatus::c_Float;
86  if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;
87  return ENumberStatus::c_Invalid;
88  case ENumberStatus::c_Exponent:
89  // and for the exponent we need either additional digits or a sign
90  if (std::isdigit(next)) return ENumberStatus::c_Scientific;
91  if (next == '+' or next == '-') return ENumberStatus::c_ExponentSign;
92  return ENumberStatus::c_Invalid;
93  case ENumberStatus::c_ExponentSign:
94  case ENumberStatus::c_Scientific:
95  // and after the exponent sign and any digit thereafter only digits are possible
96  if (std::isdigit(next)) return ENumberStatus::c_Scientific;
97  return ENumberStatus::c_Invalid;
98  }
99  return ENumberStatus::c_Invalid;
100  }
101 
103  {
104  // we only have binary operators so we need two operands
105  if (stacksize < 1)
106  throw std::runtime_error("could not parse, stack of operands empty. Please report, this is most likely a bug");
107  if (stacksize < 2)
108  throw std::runtime_error("Missing operand");
109  }
110 
112  {
114  //the last thing we added was a variable so a bracket doesn't make sense
115  if (!m_lastTokenWasOperator) throw std::runtime_error("missing operator");
116  // otherwise, ont the stack it goes
117  m_operatorStack.push(op);
118  return;
119  }
121  // closing bracket. Look for a matching opening bracket and execute all
122  // operators until then
125  if (op == EOperator::c_squareBracketClose) std::swap(correct, wrong);
126  while (!m_operatorStack.empty()) {
127  EOperator tok = m_operatorStack.top();
128  m_operatorStack.pop();
129  if (tok == wrong) throw std::runtime_error("wrong type of closing bracket");
130  if (tok == correct) return;
131  executeOperator(tok);
132  }
133  // stack is empty, still no bracket
134  throw std::runtime_error("unmatched bracket");
135  }
136 
137  // Ok, now normal operators: there shouldn't be two in a row
138  if (m_lastTokenWasOperator) throw std::runtime_error("missing operand before operator");
139  m_lastTokenWasOperator = true;
140 
141  // The operator precedence is in the upper 4 bits ... hrhr
142  // TODO: make a function for this?
143  int op_precedence = (int)op >> 4;
144  while (!m_operatorStack.empty()) {
145  EOperator tok = m_operatorStack.top();
146  // Stop at brackets
148  int tok_precedence = (int)tok >> 4;
149  // Pow operator has right assiocativity, all others are left associative
150  // TODO: make nicer?
151  bool tok_right = op == EOperator::c_power;
152  // If the token has lower precedence or equal precedence but is right associative stop taking tokens
153  if (tok_precedence < op_precedence or (tok_precedence == op_precedence and tok_right)) break;
154  // otherwise pop and execute
155  executeOperator(tok);
156  m_operatorStack.pop();
157  }
158  m_operatorStack.push(op);
159  }
160 
162  {
163  while (!m_operatorStack.empty()) {
164  EOperator op = m_operatorStack.top();
165  m_operatorStack.pop();
166  // found a bracket but no more closing brackets to come ... so error
168  throw std::runtime_error("missing closing bracket");
169  executeOperator(op);
170  }
171  }
172 
174  {
175  if (!m_currentVariableName.empty()) {
176  if (!m_lastTokenWasOperator) throw std::runtime_error("Missing operator before variable");
177  m_lastTokenWasOperator = false;
178  // looks like a number, so add a number
180  char* ptr;
181  double value;
182  value = std::strtod(m_currentVariableName.c_str(), &ptr);
183  addVariable(InputToken(value));
184  } else {
186  }
187  }
188  m_currentVariableName.clear();
190  }
191 
193  {
194  if (next == '+' or next == '-') {
195  // plus and minus are also part of literals so only treat it as operator
196  // if, together with the next character, this is not a valid float literal
197  auto isvalid = checkNumber(m_currentVariableNameNumberStatus, next);
198  if (isvalid != ENumberStatus::c_Invalid and checkNumber(isvalid, m_buffer.peek()) != ENumberStatus::c_Invalid) {
199  // this looks like a number don't interpret as operator
200  return EOperator::c_noop;
201  }
202  if (next == '+') return EOperator::c_plus;
203  if (next == '-') return EOperator::c_minus;
204  }
205  if (next == '/') return EOperator::c_divide;
206  if (next == '^') return EOperator::c_power;
207  if (next == '*') {
208  // is it python style '**'? if yes, remove one char from stream and
209  // assume pow
210  if (m_buffer.peek() == '*') {
211  m_buffer.get();
212  return EOperator::c_power;
213  }
214  // otherwise multiply
215  return EOperator::c_multiply;
216  }
217  if (next == '(') return EOperator::c_roundBracketOpen;
218  if (next == ')') return EOperator::c_roundBracketClose;
219  if (next == '[') return EOperator::c_squareBracketOpen;
220  if (next == ']') return EOperator::c_squareBracketClose;
221  // no operator, so let's return just that
222  return EOperator::c_noop;
223  }
224 
225  void FormulaParserBase::processString(const std::string& formula)
226  {
227  // initialize buffer
228  m_buffer = std::istringstream(formula);
229  // clear stacks
230  std::stack<EOperator>().swap(m_operatorStack);
231  // and an empty identifier name
232  m_currentVariableName.clear();
233  m_lastTokenWasOperator = true;
234  // reset some other variable state
236  // and if the variable has arguments remember the nesting level of the ()
237  int nestlevel{0};
238  // Now loop over the whole formula character by character
239  for (char next; m_buffer.get(next);) {
240  // If nestlevel>0 we are in a variable(...) parameters area so ignore
241  // everything but keep track of how many open/closing brackets we saw
242  // until we are back to nestlevel=0
243  if (nestlevel > 0) {
244  m_currentVariableName += next;
245  if (next == '(') ++nestlevel;
246  if (next == ')') --nestlevel;
247  // finished variable arguments so variable is definitely done
248  if (nestlevel == 0) flushCurrentVariable();
249  // done with this character
250  continue;
251  }
252 
253  // check for opening parenthesis: could be variable arguments or operation binding
254  if (next == '(' and not m_currentVariableName.empty()) {
255  m_currentVariableName += next;
256  ++nestlevel;
257  // definitely not a number anymore
259  // done with this character
260  continue;
261  }
262 
263  // check for operator
264  auto opcode = checkForOperator(next);
265  if (opcode != EOperator::c_noop) {
266  // found operator, flush variable, add operator
268  addOperator(opcode);
269  // done with this character
270  continue;
271  }
272 
273  // check for whitespace
274  if (next == ' ' or next == '\n' or next == '\t' or next == '\r') {
275  // variable is finished, just flush here.
277  // otherwise nothing to do with whitespace ...
278  continue;
279  }
280 
281  // anything else is a identifier, most likely a variable name or a
282  // float literal now lets build up the variable name, first lets check
283  // if the variable name will still be a valid number
285  // then just add it to the state
286  m_currentVariableName += next;
287  }
288  if (nestlevel > 0) throw std::runtime_error("unterminated variable arguments");
289  // done with parsing, lets make sure everything is flushed
292  }
293 
294  void FormulaParserBase::raiseError(const std::runtime_error& e)
295  {
296  // So lets some fun printing the error message :D
297  std::ostringstream message;
298  // check where we stopped parsing
299  auto pos = m_buffer.tellg();
300  // -1 -> after the end
301  if (pos == -1) pos = m_buffer.str().size() + 1;
302  // basic boring message + reason
303  message << "Error parsing formula at character " << pos << ": " << e.what() << std::endl;
304  // now lets go through the formula again, line by line. YES, multi line formula are a thing
305  std::istringstream errbuff(m_buffer.str());
306  long lastpos = 0;
307  bool arrowShown{false};
308  for (std::string line; std::getline(errbuff, line);) {
309  // print each line
310  message << " " << line << std::endl;
311  // and get the position after each line
312  auto curpos = errbuff.tellg();
313  // if it's the last line or if we are now beyond the error position then print an arrow
314  if (!arrowShown && (curpos == -1 || curpos >= pos)) { // -1 = last line
315  // from the beginning to the line to the position of the error
316  for (long i = lastpos - 1; i < pos; ++i) message << "-";
317  message << "^" << std::endl;
318  // but only show it once
319  arrowShown = true;
320  }
321  lastpos = curpos;
322  }
323  throw std::runtime_error(message.str());
324  }
326 }
std::string m_currentVariableName
collect characters into a variable name
virtual void executeOperator(EOperator op)=0
Execute an operator on the current state.
virtual void addVariable(const InputToken &token)=0
Add a variable token to the current state.
std::stack< EOperator > m_operatorStack
Stack of operators for the Shunting-yard algorithm.
EOperator
List of known operators.
Definition: FormulaParser.h:31
@ c_roundBracketClose
Close round bracket.
@ c_roundBracketOpen
Open round bracket.
@ c_squareBracketOpen
Open square bracket.
@ c_squareBracketClose
Close square bracket.
std::variant< std::string, double > InputToken
Input token type: an input tokein is either a string or a float variable.
Definition: FormulaParser.h:59
std::istringstream m_buffer
Buffer for the formula.
ENumberStatus
States of a string literal when checking if it is a valid number.
Definition: FormulaParser.h:45
ENumberStatus m_currentVariableNameNumberStatus
State of the current variable name being a valid float literal.
bool m_lastTokenWasOperator
Bool to check whether there were consecutive operators or variables.
EOperator checkForOperator(char next)
Check if the next character is a operator.
void flushPendingOperators()
Flush all pending operators at the end of processing.
void addOperator(EOperator op)
Add an operator to the internal state, convert them to reverse polish notation using the shunting yar...
void processString(const std::string &formula)
Process the given formula and store the final state.
static double applyOperator(EOperator op, double a, double b)
Apply operator on two values.
static void assertOperatorUsable(size_t stacksize)
Make sure we have enough operands to use an operator.
void raiseError(const std::runtime_error &e)
Format the given runtime_error with context information and rethrow a new one.
void flushCurrentVariable()
Flush the currently parsed variable name and add it to the state either as variable or number.
static char operatorToChar(EOperator op) noexcept
Convert operator code to character.
static ENumberStatus checkNumber(ENumberStatus current, char next)
Check if a string literal with a given number status continues to be a valid number if next is append...
Abstract base class for different kinds of events.