release-06-02-00/doxygen/FormulaParser_8cc_source.html

 /**************************************************************************

  * basf2 (Belle II Analysis Software Framework)                           *

  * Author: The Belle II Collaboration                                     *

  *                                                                        *

  * See git log for contributors and copyright holders.                    *

  * This file is licensed under LGPL-3.0, see LICENSE.md.                  *

  **************************************************************************/


 #include <framework/utilities/FormulaParser.h>

 #include <stdexcept>

 #include <cmath>


 namespace Belle2 {

   char FormulaParserBase::operatorToChar(EOperator op) noexcept

   {

     switch (op) {

       case EOperator::c_noop: return ' ';

       case EOperator::c_plus: return '+';

       case EOperator::c_minus: return '-';

       case EOperator::c_multiply: return '*';

       case EOperator::c_divide: return '/';

       case EOperator::c_power: return '^';

       case EOperator::c_roundBracketOpen: return '(';

       case EOperator::c_roundBracketClose: return ')';

       case EOperator::c_squareBracketOpen: return '[';

       case EOperator::c_squareBracketClose: return ']';

     }

     return 0;

   }


   double FormulaParserBase::applyOperator(EOperator op, double a, double b)

   {

     switch (op) {

       case EOperator::c_plus: return a + b;

       case EOperator::c_minus: return a - b;

       case EOperator::c_multiply: return a * b;

       case EOperator::c_divide: return a / b;

       case EOperator::c_power: return std::pow(a, b);

       default: throw std::runtime_error(std::string("Cannot apply operator ") + operatorToChar(op));

     }

     return 0;

   }


   auto FormulaParserBase::checkNumber(ENumberStatus current, char next) -> ENumberStatus {

     switch (current)

     {

       case ENumberStatus::c_Invalid:

         // invalid stays invalid

         return ENumberStatus::c_Invalid;

       case ENumberStatus::c_Empty:

         // numbers are allowed to start with digits, a dot or a sign

         if (std::isdigit(next)) return ENumberStatus::c_Int;

         if (next == '.') return ENumberStatus::c_LeadingDot;

         if (next == '+' or next == '-') return ENumberStatus::c_Sign;

         // everything else we don't like

         return ENumberStatus::c_Invalid;

       case ENumberStatus::c_Sign:

         // if we started with a sign we can only go to digit and dots, no exponent

         if (std::isdigit(next)) return ENumberStatus::c_Int;

         if (next == '.') return ENumberStatus::c_Dot;

         return ENumberStatus::c_Invalid;

       case ENumberStatus::c_Int:

         // So far it's a valid int consisting only of [sign +] digits, next

         // stage is more digits, a . or an exponent

         if (std::isdigit(next)) return ENumberStatus::c_Int;

         if (next == '.') return ENumberStatus::c_Dot;

         if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;

         return ENumberStatus::c_Invalid;

       case ENumberStatus::c_Dot:

         // After the dot there can be more digits ... or a exponent

         if (std::isdigit(next)) return ENumberStatus::c_Float;

         if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;

         return ENumberStatus::c_Invalid;

       case ENumberStatus::c_LeadingDot:

         // But if the dot was in the beginning then no exponent

         if (std::isdigit(next)) return ENumberStatus::c_Float;

         return ENumberStatus::c_Invalid;

       // so, we saw some digits after the dot ... more digits or exponent it is

       case ENumberStatus::c_Float:

         if (std::isdigit(next)) return ENumberStatus::c_Float;

         if (next == 'E' or next == 'e') return ENumberStatus::c_Exponent;

         return ENumberStatus::c_Invalid;

       case ENumberStatus::c_Exponent:

         // and for the exponent we need either additional digits or a sign

         if (std::isdigit(next)) return ENumberStatus::c_Scientific;

         if (next == '+' or next == '-') return ENumberStatus::c_ExponentSign;

         return ENumberStatus::c_Invalid;

       case ENumberStatus::c_ExponentSign:

       case ENumberStatus::c_Scientific:

         // and after the exponent sign and any digit thereafter only digits are possible

         if (std::isdigit(next)) return ENumberStatus::c_Scientific;

         return ENumberStatus::c_Invalid;

     }

     return ENumberStatus::c_Invalid;

   }


   void FormulaParserBase::assertOperatorUsable(size_t stacksize)

   {

     // we only have binary operators so we need two operands

     if (stacksize < 1)

       throw std::runtime_error("could not parse, stack of operands empty. Please report, this is most likely a bug");

     if (stacksize < 2)

       throw std::runtime_error("Missing operand");

   }


   void FormulaParserBase::addOperator(EOperator op)

   {

     if (op == EOperator::c_squareBracketOpen or op == EOperator::c_roundBracketOpen) {

       //the last thing we added was a variable so a bracket doesn't make sense

       if (!m_lastTokenWasOperator) throw std::runtime_error("missing operator");

       // otherwise, ont the stack it goes

       m_operatorStack.push(op);

       return;

     }

     if (op == EOperator::c_squareBracketClose or op == EOperator::c_roundBracketClose) {

       // closing bracket. Look for a matching opening bracket and execute all

       // operators until then

       EOperator correct = EOperator::c_roundBracketOpen;

       EOperator wrong = EOperator::c_squareBracketOpen;

       if (op == EOperator::c_squareBracketClose) std::swap(correct, wrong);

       while (!m_operatorStack.empty()) {

         EOperator tok = m_operatorStack.top();

         m_operatorStack.pop();

         if (tok == wrong) throw std::runtime_error("wrong type of closing bracket");

         if (tok == correct) return;

         executeOperator(tok);

       }

       // stack is empty, still no bracket

       throw std::runtime_error("unmatched bracket");

     }


     // Ok, now normal operators: there shouldn't be two in a row

     if (m_lastTokenWasOperator) throw std::runtime_error("missing operand before operator");

     m_lastTokenWasOperator = true;


     // The operator precedence is in the upper 4 bits ... hrhr

     // TODO: make a function for this?

     int op_precedence = (int)op >> 4;

     while (!m_operatorStack.empty()) {

       EOperator tok = m_operatorStack.top();

       // Stop at brackets

       if (tok == EOperator::c_roundBracketOpen or tok == EOperator::c_squareBracketOpen) break;

       int tok_precedence = (int)tok >> 4;

       // Pow operator has right assiocativity, all others are left associative

       // TODO: make nicer?

       bool tok_right = op == EOperator::c_power;

       // If the token has lower precedence or equal precedence but is right associative stop taking tokens

       if (tok_precedence < op_precedence or (tok_precedence == op_precedence and tok_right)) break;

       // otherwise pop and execute

       executeOperator(tok);

       m_operatorStack.pop();

     }

     m_operatorStack.push(op);

   }


   void FormulaParserBase::flushPendingOperators()

   {

     while (!m_operatorStack.empty()) {

       EOperator op = m_operatorStack.top();

       m_operatorStack.pop();

       // found a bracket but no more closing brackets to come ... so error

       if (op == EOperator::c_squareBracketOpen or op == EOperator::c_roundBracketOpen)

         throw std::runtime_error("missing closing bracket");

       executeOperator(op);

     }

   }


   void FormulaParserBase::flushCurrentVariable()

   {

     if (!m_currentVariableName.empty()) {

       if (!m_lastTokenWasOperator) throw std::runtime_error("Missing operator before variable");

       m_lastTokenWasOperator = false;

       // looks like a number, so add a number

       if (m_currentVariableNameNumberStatus != ENumberStatus::c_Invalid) {

         char* ptr;

         double value;

         value = std::strtod(m_currentVariableName.c_str(), &ptr);

         addVariable(InputToken(value));

       } else {

         addVariable(InputToken(m_currentVariableName));

       }

     }

     m_currentVariableName.clear();

     m_currentVariableNameNumberStatus = ENumberStatus::c_Empty;

   }


   auto FormulaParserBase::checkForOperator(char next) -> EOperator {

     if (next == '+' or next == '-')

     {

       // plus and minus are also part of literals so only treat it as operator

       // if, together with the next character, this is not a valid float literal

       auto isvalid = checkNumber(m_currentVariableNameNumberStatus, next);

       if (isvalid != ENumberStatus::c_Invalid and checkNumber(isvalid, m_buffer.peek()) != ENumberStatus::c_Invalid) {

         // this looks like a number don't interpret as operator

         return EOperator::c_noop;

       }

       if (next == '+') return EOperator::c_plus;

       if (next == '-') return EOperator::c_minus;

     }

     if (next == '/') return EOperator::c_divide;

     if (next == '^') return EOperator::c_power;

     if (next == '*')

     {

       // is it python style '**'? if yes, remove one char from stream and

       // assume pow

       if (m_buffer.peek() == '*') {

         m_buffer.get();

         return EOperator::c_power;

       }

       // otherwise multiply

       return EOperator::c_multiply;

     }

     if (next == '(') return EOperator::c_roundBracketOpen;

     if (next == ')') return EOperator::c_roundBracketClose;

     if (next == '[') return EOperator::c_squareBracketOpen;

     if (next == ']') return EOperator::c_squareBracketClose;

     // no operator, so let's return just that

     return EOperator::c_noop;

   }


   void FormulaParserBase::processString(const std::string& formula)

   {

     // initialize buffer

     m_buffer = std::istringstream(formula);

     // clear stacks

     std::stack<EOperator>().swap(m_operatorStack);

     // and an empty identifier name

     m_currentVariableName.clear();

     m_lastTokenWasOperator = true;

     // reset some other variable state

     flushCurrentVariable();

     // and if the variable has arguments remember the nesting level of the ()

     int nestlevel{0};

     // Now loop over the whole formula character by character

     for (char next; m_buffer.get(next);) {

       // If nestlevel>0 we are in a variable(...) parameters area so ignore

       // everything but keep track of how many open/closing brackets we saw

       // until we are back to nestlevel=0

       if (nestlevel > 0) {

         m_currentVariableName += next;

         if (next == '(') ++nestlevel;

         if (next == ')') --nestlevel;

         // finished variable arguments so variable is definitely done

         if (nestlevel == 0) flushCurrentVariable();

         // done with this character

         continue;

       }


       // check for opening parenthesis: could be variable arguments or operation binding

       if (next == '(' and not m_currentVariableName.empty()) {

         m_currentVariableName += next;

         ++nestlevel;

         // definitely not a number anymore

         m_currentVariableNameNumberStatus = ENumberStatus::c_Invalid;

         // done with this character

         continue;

       }


       // check for operator

       auto opcode = checkForOperator(next);

       if (opcode != EOperator::c_noop) {

         // found operator, flush variable, add operator

         flushCurrentVariable();

         addOperator(opcode);

         // done with this character

         continue;

       }


       // check for whitespace

       if (next == ' ' or next == '\n' or next == '\t' or next == '\r') {

         // variable is finished, just flush here.

         flushCurrentVariable();

         // otherwise nothing to do with whitespace ...

         continue;

       }


       // anything else is a identifier, most likely a variable name or a

       // float literal now lets build up the variable name, first lets check

       // if the variable name will still be a valid number

       m_currentVariableNameNumberStatus = checkNumber(m_currentVariableNameNumberStatus, next);

       // then just add it to the state

       m_currentVariableName += next;

     }

     if (nestlevel > 0) throw std::runtime_error("unterminated variable arguments");

     // done with parsing, lets make sure everything is flushed

     flushCurrentVariable();

     flushPendingOperators();

   }


   void FormulaParserBase::raiseError(const std::runtime_error& e)

   {

     // So lets some fun printing the error message :D

     std::ostringstream message;

     // check where we stopped parsing

     auto pos = m_buffer.tellg();

     // -1 -> after the end

     if (pos == -1) pos = m_buffer.str().size() + 1;

     // basic boring message + reason

     message << "Error parsing formula at character " << pos << ": " << e.what() << std::endl;

     // now lets go through the formula again, line by line. YES, multi line formula are a thing

     std::istringstream errbuff(m_buffer.str());

     long lastpos = 0;

     bool arrowShown{false};

     for (std::string line; std::getline(errbuff, line);) {

       // print each line

       message << "   " << line << std::endl;

       // and get the position after each line

       auto curpos = errbuff.tellg();

       // if it's the last line or if we are now beyond the error position then print an arrow

       if (!arrowShown && (curpos == -1 || curpos >= pos)) { // -1 = last line

         // from the beginning to the line to the position of the error

         for (long i = lastpos - 1; i < pos; ++i) message << "-";

         message << "^" << std::endl;

         // but only show it once

         arrowShown = true;

       }

       lastpos = curpos;

     }

     throw std::runtime_error(message.str());

   }

 }

Belle2::FormulaParserBase::m_currentVariableName
std::string m_currentVariableName
collect characters into a variable name
Definition: FormulaParser.h:105

Belle2::FormulaParserBase::executeOperator
virtual void executeOperator(EOperator op)=0
Execute an operator on the current state.

Belle2::FormulaParserBase::addVariable
virtual void addVariable(const InputToken &token)=0
Add a variable token to the current state.

Belle2::FormulaParserBase::m_operatorStack
std::stack< EOperator > m_operatorStack
Stack of operators for the Shunting-yard algorithm.
Definition: FormulaParser.h:109

Belle2::FormulaParserBase::EOperator
EOperator
List of known operators.
Definition: FormulaParser.h:31

Belle2::FormulaParserBase::EOperator::c_multiply
@ c_multiply
Multiply.

Belle2::FormulaParserBase::EOperator::c_power
@ c_power
Exponentation.

Belle2::FormulaParserBase::EOperator::c_roundBracketClose
@ c_roundBracketClose
Close round bracket.

Belle2::FormulaParserBase::EOperator::c_divide
@ c_divide
Division.

Belle2::FormulaParserBase::EOperator::c_plus
@ c_plus
Addition.

Belle2::FormulaParserBase::EOperator::c_minus
@ c_minus
Subtraction.

Belle2::FormulaParserBase::EOperator::c_roundBracketOpen
@ c_roundBracketOpen
Open round bracket.

Belle2::FormulaParserBase::EOperator::c_squareBracketOpen
@ c_squareBracketOpen
Open square bracket.

Belle2::FormulaParserBase::EOperator::c_noop
@ c_noop
No operation.

Belle2::FormulaParserBase::EOperator::c_squareBracketClose
@ c_squareBracketClose
Close square bracket.

Belle2::FormulaParserBase::InputToken
std::variant< std::string, double > InputToken
Input token type: an input tokein is either a string or a float variable.
Definition: FormulaParser.h:59

Belle2::FormulaParserBase::m_buffer
std::istringstream m_buffer
Buffer for the formula.
Definition: FormulaParser.h:103

Belle2::FormulaParserBase::ENumberStatus
ENumberStatus
States of a string literal when checking if it is a valid number.
Definition: FormulaParser.h:45

Belle2::FormulaParserBase::ENumberStatus::c_Invalid
@ c_Invalid
Not a valid number.

Belle2::FormulaParserBase::ENumberStatus::c_Empty
@ c_Empty
Empty string.

Belle2::FormulaParserBase::m_currentVariableNameNumberStatus
ENumberStatus m_currentVariableNameNumberStatus
State of the current variable name being a valid float literal.
Definition: FormulaParser.h:107

Belle2::FormulaParserBase::m_lastTokenWasOperator
bool m_lastTokenWasOperator
Bool to check whether there were consecutive operators or variables.
Definition: FormulaParser.h:101

Belle2::FormulaParserBase::checkForOperator
EOperator checkForOperator(char next)
Check if the next character is a operator.
Definition: FormulaParser.cc:192

Belle2::FormulaParserBase::flushPendingOperators
void flushPendingOperators()
Flush all pending operators at the end of processing.
Definition: FormulaParser.cc:161

Belle2::FormulaParserBase::addOperator
void addOperator(EOperator op)
Add an operator to the internal state, convert them to reverse polish notation using the shunting yar...
Definition: FormulaParser.cc:111

Belle2::FormulaParserBase::processString
void processString(const std::string &formula)
Process the given formula and store the final state.
Definition: FormulaParser.cc:226

Belle2::FormulaParserBase::applyOperator
static double applyOperator(EOperator op, double a, double b)
Apply operator on two values.
Definition: FormulaParser.cc:36

Belle2::FormulaParserBase::assertOperatorUsable
static void assertOperatorUsable(size_t stacksize)
Make sure we have enough operands to use an operator.
Definition: FormulaParser.cc:102

Belle2::FormulaParserBase::raiseError
void raiseError(const std::runtime_error &e)
Format the given runtime_error with context information and rethrow a new one.
Definition: FormulaParser.cc:295

Belle2::FormulaParserBase::flushCurrentVariable
void flushCurrentVariable()
Flush the currently parsed variable name and add it to the state either as variable or number.
Definition: FormulaParser.cc:173

Belle2::FormulaParserBase::operatorToChar
static char operatorToChar(EOperator op) noexcept
Convert operator code to character.
Definition: FormulaParser.cc:19

Belle2::FormulaParserBase::checkNumber
static ENumberStatus checkNumber(ENumberStatus current, char next)
Check if a string literal with a given number status continues to be a valid number if next is append...
Definition: FormulaParser.cc:49

Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:17