12#include <framework/logging/Logger.h>
15#include <mva/interface/Weightfile.h>
21#include <TParameter.h>
24#include <boost/algorithm/string/predicate.hpp>
27#include <unordered_map>
55 for (
auto dimensionBinEdges : binEdges) {
56 m_nBins.push_back(dimensionBinEdges.size() - 1);
73 for (
unsigned int i = 0; i < binIndices.size(); i++) {
74 if (binIndices[i] < 0)
return -1;
76 globalBin = binIndices[i];
78 globalBin = globalBin *
m_nBins[i] + binIndices[i];
93 std::vector<int> binIndices(
m_binEdges.size());
95 for (
unsigned int i = 0; i <
m_binEdges.size(); i++) {
96 std::vector<float> dimBinEdges =
m_binEdges[i];
97 auto it = std::upper_bound(dimBinEdges.begin(), dimBinEdges.end(), values[i]);
98 if (it == dimBinEdges.end()) {
101 int index = std::distance(dimBinEdges.begin(), it) - 1;
102 binIndices[i] = index;
169 const std::unordered_map<unsigned int, unsigned int>& mvaIndexForHypothesis) :
177 if (boost::ends_with(weightfilePath,
".root")) {
179 }
else if (boost::ends_with(weightfilePath,
".xml")) {
182 B2WARNING(
"Unknown file extension for file: " << weightfilePath <<
", fallback to xml...");
185 std::stringstream ss;
214 const TF1*
getPDF(
const unsigned int iMVAResponse,
const unsigned int hypoPDG)
const
216 return &
m_pdfs.at(iMVAResponse).at(hypoPDG);
224 const TH1F*
getCDF(
const unsigned int iMVAResponse,
const int hypoPDG)
const
226 return &
m_cdfs.at(iMVAResponse).at(hypoPDG);
242 void setCDFs(std::vector<std::unordered_map<unsigned int, TH1F>> cdfs) {
m_cdfs = cdfs;}
249 void setPDFs(std::vector<std::unordered_map<unsigned int, TF1>>& pdfs) {
m_pdfs = pdfs;}
340 std::vector<std::unordered_map<unsigned int, TF1>>
m_pdfs;
353 std::vector<std::unordered_map<unsigned int, TH1F>>
m_cdfs;
395 void storeMVAWeights(std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>& phasespaceCategories)
418 if (linearBinIndex < 0)
return false;
432 B2FATAL(
"No N dimensional grid was found in the ECLChargedPIDMVA DB payload. This should not happen! Abort...");
Class to contain payload of everything needed for MVA based charged particle identification.
const ECLChargedPIDPhasespaceCategory * getPhasespaceCategory(const unsigned int idx) const
Returns the ith ECLChargedPIDPhasespaceCategory.
ECLChargedPIDMVAWeights()
Default constructor, necessary for ROOT to stream the object.
void setBinningVariables(std::vector< std::string > &binningVariables)
Set string definitions of the variables used in defining the phasespace categories.
std::vector< std::string > m_binningVariables
Stores the list of variables used to define the phasespace binning.
~ECLChargedPIDMVAWeights()
Destructor.
bool isPhasespaceCovered(const int linearBinIndex) const
Returns bool whether or not the given values are within the phasespace covered by the trainings in th...
ECLChargedPIDPhasespaceBinning * m_categories
An N Dimensional binning whose bins define the boundaries of the categories for which the training is...
const std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > * getPhasespaceCategories() const
Returns the map of phasespaceCategories.
std::vector< std::string > getBinningVariables() const
Returns string definitions of the variables used in defining the phasespace categories.
unsigned int getLinearisedCategoryIndex(std::vector< float > values) const
Returns the flattened 1D index of the N dimensional phasespace category grid.
void setWeightCategories(ECLChargedPIDPhasespaceBinning *h)
Set the N dimensional grid representing the categories for which weightfiles are defined.
ClassDef(ECLChargedPIDMVAWeights, 1)
ClassDef
std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > m_phasespaceCategories
Stores the ECLChargedPIDPhasespaceCategory object for all the N dimensional categories.
void storeMVAWeights(std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > &phasespaceCategories)
Store the ECLChargedPIDPhasespaceCategory objects into the payload.
Class to store the N dimensional phasespace binning of the MVA categorical training.
std::vector< int > m_nBins
Vector of number of bins per dimension.
std::vector< int > getBinIndices(const std::vector< float > values)
Maps the vector of input values to their bin index in N dimensions.
int getLinearisedBinIndex(const std::vector< float > values)
Maps the vector of input values to a global bin index.
std::vector< std::vector< float > > m_binEdges
Vector of bin edges.
ECLChargedPIDPhasespaceBinning()
Default Constructor.
ClassDef(ECLChargedPIDPhasespaceBinning, 1)
ClassDef.
~ECLChargedPIDPhasespaceBinning()
Destructor.
ECLChargedPIDPhasespaceBinning(const std::vector< std::vector< float > > binEdges)
Constructor.
Stores all required information for the ECLChargedPIDMVA for a phasespace category.
unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
Maps a charged stable pdg code to an index of the MVA response.
MVAResponseTransformMode m_mvaResponseTransformMode
Stores which transformation mode to apply to the mva responses.
~ECLChargedPIDPhasespaceCategory()
Destructor.
const std::string getSerialisedWeight() const
Getter for serialised weightfile.
void setCDFs(std::vector< std::unordered_map< unsigned int, TH1F > > cdfs)
Set the cdfs.
TParameter< float > m_max_possible_response_value
Max possible value of the mva response.
void setPDFs(std::vector< std::unordered_map< unsigned int, TF1 > > &pdfs)
Set the pdfs.
void setMaxPossibleResponseValue(const float &offset)
Set the max possible response value, used in log transformation of the responses.
ECLChargedPIDPhasespaceCategory()
Default constructor, necessary for ROOT to stream the object.
std::vector< std::unordered_map< unsigned int, TH1F > > m_cdfs
CDFs for each mva return value for each hypothesis.
MVAResponseTransformMode getTransformMode() const
Getter for the MVA transform mode.
void setDecorrelationMatrixMap(std::unordered_map< unsigned int, std::vector< float > > decorrelationMatrices)
Set the decorrelation matrices.
float getMaxPossibleResponseValue() const
Get the max possible response value, used in log transformation of the responses.
const TF1 * getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
Getter for pdfs.
ClassDef(ECLChargedPIDPhasespaceCategory, 1)
ClassDef.
std::unordered_map< unsigned int, std::vector< float > > m_decorrelationMatrices
Decorrelation matrices.
std::unordered_map< unsigned int, unsigned int > m_mvaIndexForHypothesis
Unordered map of abs(pdg_code) for the 6 charged stable hypotheses to index of the MVA response vecto...
void setTemperature(const float &temperature)
Set the temperature parameter used to calibrate the MVA.
ECLChargedPIDPhasespaceCategory(const std::string weightfilePath, const MVAResponseTransformMode &mvaResponeTransformMode, const std::unordered_map< unsigned int, unsigned int > &mvaIndexForHypothesis)
Useful constructor.
float getTemperature() const
Getter for the temperature.
TParameter< float > m_temperature
calibration factor for MVA responses.
const std::vector< float > * getDecorrelationMatrix(const int hypoPDG) const
Gets the decorrelation matrix for a given particle hypothesis.
std::vector< std::unordered_map< unsigned int, TF1 > > m_pdfs
A vector of unodered maps.
const TH1F * getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
Gets the cdf for the hypothesis pdg for a given response value.
TParameter< float > m_log_transform_offset
Small offset to avoid mva response values of 1.0 being log transformed to NaN.
std::string m_weight
Serialsed MVA weightfile.
float getLogTransformOffset() const
Getter for the log transform offset.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
@ c_LogTransformSingle
Log transform the mva responses.
@ c_LogMVAResponse
Take the log of the MVA response.
@ c_DirectMVAResponse
Directly take the MVA response as the logL, useful if we train neural nets to learn the logL.
@ c_LogTransform
Log transform the mva responses.
@ c_DecorrelationTransform
Decorrelation transform of the gaussian transformed mva responses.
@ c_GaussianTransform
Gaussian transform of the log transformed mva response.
void setlogTransformOffset(const float &offset)
Set the offset used in the log transformation to be consistent with the offset used when generating t...
The Weightfile class serializes all information about a training into an xml tree.
static Weightfile loadFromXMLFile(const std::string &filename)
Static function which loads a Weightfile from a XML file.
static Weightfile loadFromROOTFile(const std::string &filename)
Static function which loads a Weightfile from a ROOT file.
static void saveToStream(Weightfile &weightfile, std::ostream &stream)
Static function which serializes a Weightfile to a stream.
Abstract base class for different kinds of events.