Belle II Software development
ECLChargedPIDMVAWeights.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10
11// FRAMEWORK
12#include <framework/logging/Logger.h>
13
14// MVA
15#include <mva/interface/Weightfile.h>
16
17// ROOT
18#include <TObject.h>
19#include <TF1.h>
20#include <TH1.h>
21#include <TParameter.h>
22
23//BOOST
24#include <boost/algorithm/string/predicate.hpp>
25
26// C++
27#include <unordered_map>
28#include <algorithm>
29
30namespace Belle2 {
39 class ECLChargedPIDPhasespaceBinning : public TObject {
40
41 public:
42
47
52 ECLChargedPIDPhasespaceBinning(const std::vector<std::vector<float>> binEdges)
53 {
54 m_binEdges = binEdges;
55 for (auto dimensionBinEdges : binEdges) {
56 m_nBins.push_back(dimensionBinEdges.size() - 1);
57 }
58 }
59
64
69 int getLinearisedBinIndex(const std::vector<float> values)
70 {
71 int globalBin(-1);
72 std::vector<int> binIndices = getBinIndices(values);
73 for (unsigned int i = 0; i < binIndices.size(); i++) {
74 if (binIndices[i] < 0) return -1;
75 if (i == 0) {
76 globalBin = binIndices[i];
77 } else {
78 globalBin = globalBin * m_nBins[i] + binIndices[i];
79 }
80 }
81 return globalBin;
82 }
83
84 private:
85
91 std::vector<int> getBinIndices(const std::vector<float> values)
92 {
93 std::vector<int> binIndices(m_binEdges.size());
94
95 for (unsigned int i = 0; i < m_binEdges.size(); i++) {
96 std::vector<float> dimBinEdges = m_binEdges[i];
97 auto it = std::upper_bound(dimBinEdges.begin(), dimBinEdges.end(), values[i]);
98 if (it == dimBinEdges.end()) {
99 binIndices[i] = -1;
100 } else {
101 int index = std::distance(dimBinEdges.begin(), it) - 1;
102 binIndices[i] = index;
103 }
104 }
105 return binIndices;
106 }
107
111 std::vector<std::vector<float>> m_binEdges;
112
116 std::vector<int> m_nBins;
117
118 // 1: First class implementation.
120 };
121
132 class ECLChargedPIDPhasespaceCategory : public TObject {
133
134 public:
136 enum class MVAResponseTransformMode : unsigned int {
138 c_LogTransform = 0,
149 };
150
155 m_log_transform_offset("logTransformOffset", 1e-15),
156 m_max_possible_response_value("maxPossibleResponseValue", 1.0),
157 m_temperature("temperature", 1.0)
158 {};
159
167 ECLChargedPIDPhasespaceCategory(const std::string weightfilePath,
168 const MVAResponseTransformMode& mvaResponeTransformMode,
169 const std::unordered_map<unsigned int, unsigned int>& mvaIndexForHypothesis) :
170 m_log_transform_offset("logTransformOffset", 1e-15),
171 m_max_possible_response_value("maxPossibleResponseValue", 1.0),
172 m_temperature("temperature", 1.0)
173 {
174 // Load and serialize the MVA::Weightfile object into a string for storage in the database,
175 // otherwise there are issues w/ dictionary generation for the payload class...
176 Belle2::MVA::Weightfile weightfile;
177 if (boost::ends_with(weightfilePath, ".root")) {
178 weightfile = Belle2::MVA::Weightfile::loadFromROOTFile(weightfilePath);
179 } else if (boost::ends_with(weightfilePath, ".xml")) {
180 weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
181 } else {
182 B2WARNING("Unknown file extension for file: " << weightfilePath << ", fallback to xml...");
183 weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
184 }
185 std::stringstream ss;
187
188 // store
189 m_weight = ss.str();
190 m_mvaResponseTransformMode = mvaResponeTransformMode;
191 m_mvaIndexForHypothesis = mvaIndexForHypothesis;
192 }
193
198
202 const std::string getSerialisedWeight() const {return m_weight;}
203
208
214 const TF1* getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
215 {
216 return &m_pdfs.at(iMVAResponse).at(hypoPDG);
217 }
218
224 const TH1F* getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
225 {
226 return &m_cdfs.at(iMVAResponse).at(hypoPDG);
227 }
228
233 const std::vector<float>* getDecorrelationMatrix(const int hypoPDG) const
234 {
235 return &m_decorrelationMatrices.at(hypoPDG);
236 }
237
242 void setCDFs(std::vector<std::unordered_map<unsigned int, TH1F>> cdfs) {m_cdfs = cdfs;}
243
244
249 void setPDFs(std::vector<std::unordered_map<unsigned int, TF1>>& pdfs) {m_pdfs = pdfs;}
250
251
256 void setDecorrelationMatrixMap(std::unordered_map<unsigned int, std::vector<float>> decorrelationMatrices)
257 {
258 m_decorrelationMatrices = decorrelationMatrices;
259 }
260
264 void setlogTransformOffset(const float& offset)
265 {
266 m_log_transform_offset.SetVal(offset);
267 }
268
273 {
274 return m_log_transform_offset.GetVal();
275 }
276
280 void setTemperature(const float& temperature)
281 {
282 m_temperature.SetVal(temperature);
283 }
284
288 float getTemperature() const
289 {
290 return m_temperature.GetVal();
291 }
292
296 void setMaxPossibleResponseValue(const float& offset)
297 {
298 m_max_possible_response_value.SetVal(offset);
299 }
300
305 {
306 return m_max_possible_response_value.GetVal();
307 }
308
315 unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
316 {
317 return m_mvaIndexForHypothesis.at(hypoPDG);
318 }
319
320 private:
321
322 TParameter<float> m_log_transform_offset;
324 TParameter<float> m_temperature;
329 std::string m_weight;
330
335
340 std::vector<std::unordered_map<unsigned int, TF1>> m_pdfs;
341
346 std::unordered_map<unsigned int, unsigned int> m_mvaIndexForHypothesis;
347
353 std::vector<std::unordered_map<unsigned int, TH1F>> m_cdfs;
354
359 std::unordered_map<unsigned int, std::vector<float>> m_decorrelationMatrices;
360
361 // 1: First class implementation.
363 };
364
369 class ECLChargedPIDMVAWeights : public TObject {
370 public:
375
380
381
388
395 void storeMVAWeights(std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>& phasespaceCategories)
396 {
397 m_phasespaceCategories = phasespaceCategories;
398 }
399
404 const ECLChargedPIDPhasespaceCategory* getPhasespaceCategory(const unsigned int idx) const {return &m_phasespaceCategories.at(idx);}
405
409 const std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>* getPhasespaceCategories() const {return &m_phasespaceCategories;}
410
415 bool isPhasespaceCovered(const int linearBinIndex) const
416 {
417 // if the vector of values passed falls outside the defined phasespace.
418 if (linearBinIndex < 0) return false;
419 // if the vector is within the defined phasespace but we do not provide an ECLChargedPIDPhasespaceCategory object for this bin.
420 if (m_phasespaceCategories.count(linearBinIndex) == 0) return false;
421 return true;
422 }
423
429 unsigned int getLinearisedCategoryIndex(std::vector<float> values) const
430 {
431 if (!m_categories) {
432 B2FATAL("No N dimensional grid was found in the ECLChargedPIDMVA DB payload. This should not happen! Abort...");
433 }
434 return m_categories->getLinearisedBinIndex(values);
435 }
436
440 std::vector<std::string> getBinningVariables() const {return m_binningVariables;}
441
446 void setBinningVariables(std::vector<std::string>& binningVariables) {m_binningVariables = binningVariables;}
447 private:
453
457 std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory> m_phasespaceCategories;
458
462 std::vector<std::string> m_binningVariables;
463
464 // 1: First class implementation.
467 }; // class ECLChargedPIDMVAWeights
469} // Belle 2 Namespace
Class to contain payload of everything needed for MVA based charged particle identification.
const ECLChargedPIDPhasespaceCategory * getPhasespaceCategory(const unsigned int idx) const
Returns the ith ECLChargedPIDPhasespaceCategory.
ECLChargedPIDMVAWeights()
Default constructor, necessary for ROOT to stream the object.
void setBinningVariables(std::vector< std::string > &binningVariables)
Set string definitions of the variables used in defining the phasespace categories.
std::vector< std::string > m_binningVariables
Stores the list of variables used to define the phasespace binning.
bool isPhasespaceCovered(const int linearBinIndex) const
Returns bool whether or not the given values are within the phasespace covered by the trainings in th...
ECLChargedPIDPhasespaceBinning * m_categories
An N Dimensional binning whose bins define the boundaries of the categories for which the training is...
const std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > * getPhasespaceCategories() const
Returns the map of phasespaceCategories.
std::vector< std::string > getBinningVariables() const
Returns string definitions of the variables used in defining the phasespace categories.
unsigned int getLinearisedCategoryIndex(std::vector< float > values) const
Returns the flattened 1D index of the N dimensional phasespace category grid.
void setWeightCategories(ECLChargedPIDPhasespaceBinning *h)
Set the N dimensional grid representing the categories for which weightfiles are defined.
ClassDef(ECLChargedPIDMVAWeights, 1)
ClassDef
std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > m_phasespaceCategories
Stores the ECLChargedPIDPhasespaceCategory object for all the N dimensional categories.
void storeMVAWeights(std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > &phasespaceCategories)
Store the ECLChargedPIDPhasespaceCategory objects into the payload.
Class to store the N dimensional phasespace binning of the MVA categorical training.
std::vector< int > m_nBins
Vector of number of bins per dimension.
std::vector< int > getBinIndices(const std::vector< float > values)
Maps the vector of input values to their bin index in N dimensions.
int getLinearisedBinIndex(const std::vector< float > values)
Maps the vector of input values to a global bin index.
std::vector< std::vector< float > > m_binEdges
Vector of bin edges.
ClassDef(ECLChargedPIDPhasespaceBinning, 1)
ClassDef.
ECLChargedPIDPhasespaceBinning(const std::vector< std::vector< float > > binEdges)
Constructor.
Stores all required information for the ECLChargedPIDMVA for a phasespace category.
unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
Maps a charged stable pdg code to an index of the MVA response.
MVAResponseTransformMode m_mvaResponseTransformMode
Stores which transformation mode to apply to the mva responses.
const std::string getSerialisedWeight() const
Getter for serialised weightfile.
void setCDFs(std::vector< std::unordered_map< unsigned int, TH1F > > cdfs)
Set the cdfs.
TParameter< float > m_max_possible_response_value
Max possible value of the mva response.
void setPDFs(std::vector< std::unordered_map< unsigned int, TF1 > > &pdfs)
Set the pdfs.
void setMaxPossibleResponseValue(const float &offset)
Set the max possible response value, used in log transformation of the responses.
ECLChargedPIDPhasespaceCategory()
Default constructor, necessary for ROOT to stream the object.
std::vector< std::unordered_map< unsigned int, TH1F > > m_cdfs
CDFs for each mva return value for each hypothesis.
MVAResponseTransformMode getTransformMode() const
Getter for the MVA transform mode.
void setDecorrelationMatrixMap(std::unordered_map< unsigned int, std::vector< float > > decorrelationMatrices)
Set the decorrelation matrices.
float getMaxPossibleResponseValue() const
Get the max possible response value, used in log transformation of the responses.
const TF1 * getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
Getter for pdfs.
ClassDef(ECLChargedPIDPhasespaceCategory, 1)
ClassDef.
std::unordered_map< unsigned int, std::vector< float > > m_decorrelationMatrices
Decorrelation matrices.
std::unordered_map< unsigned int, unsigned int > m_mvaIndexForHypothesis
Unordered map of abs(pdg_code) for the 6 charged stable hypotheses to index of the MVA response vecto...
void setTemperature(const float &temperature)
Set the temperature parameter used to calibrate the MVA.
ECLChargedPIDPhasespaceCategory(const std::string weightfilePath, const MVAResponseTransformMode &mvaResponeTransformMode, const std::unordered_map< unsigned int, unsigned int > &mvaIndexForHypothesis)
Useful constructor.
float getTemperature() const
Getter for the temperature.
TParameter< float > m_temperature
calibration factor for MVA responses.
const std::vector< float > * getDecorrelationMatrix(const int hypoPDG) const
Gets the decorrelation matrix for a given particle hypothesis.
std::vector< std::unordered_map< unsigned int, TF1 > > m_pdfs
A vector of unodered maps.
const TH1F * getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
Gets the cdf for the hypothesis pdg for a given response value.
TParameter< float > m_log_transform_offset
Small offset to avoid mva response values of 1.0 being log transformed to NaN.
std::string m_weight
Serialsed MVA weightfile.
float getLogTransformOffset() const
Getter for the log transform offset.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
@ c_DirectMVAResponse
Directly take the MVA response as the logL, useful if we train neural nets to learn the logL.
@ c_DecorrelationTransform
Decorrelation transform of the gaussian transformed mva responses.
@ c_GaussianTransform
Gaussian transform of the log transformed mva response.
void setlogTransformOffset(const float &offset)
Set the offset used in the log transformation to be consistent with the offset used when generating t...
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
static Weightfile loadFromXMLFile(const std::string &filename)
Static function which loads a Weightfile from a XML file.
Definition: Weightfile.cc:240
static Weightfile loadFromROOTFile(const std::string &filename)
Static function which loads a Weightfile from a ROOT file.
Definition: Weightfile.cc:217
static void saveToStream(Weightfile &weightfile, std::ostream &stream)
Static function which serializes a Weightfile to a stream.
Definition: Weightfile.cc:185
Abstract base class for different kinds of events.