Belle II Software development
ECLChargedPIDMVAWeights.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10
11// FRAMEWORK
12#include <framework/logging/Logger.h>
13
14// MVA
15#include <mva/interface/Weightfile.h>
16
17// ROOT
18#include <TObject.h>
19#include <TF1.h>
20#include <TH1.h>
21#include <TParameter.h>
22
23//BOOST
24#include <boost/algorithm/string/predicate.hpp>
25
26// C++
27#include <unordered_map>
28#include <algorithm>
29
30namespace Belle2 {
35
39 class ECLChargedPIDPhasespaceBinning : public TObject {
40
41 public:
42
47
52 ECLChargedPIDPhasespaceBinning(const std::vector<std::vector<float>> binEdges)
53 {
54 m_binEdges = binEdges;
55 for (auto dimensionBinEdges : binEdges) {
56 m_nBins.push_back(dimensionBinEdges.size() - 1);
57 }
58 }
59
64
69 int getLinearisedBinIndex(const std::vector<float> values)
70 {
71 int globalBin(-1);
72 std::vector<int> binIndices = getBinIndices(values);
73 for (unsigned int i = 0; i < binIndices.size(); i++) {
74 if (binIndices[i] < 0) return -1;
75 if (i == 0) {
76 globalBin = binIndices[i];
77 } else {
78 globalBin = globalBin * m_nBins[i] + binIndices[i];
79 }
80 }
81 return globalBin;
82 }
83
84 private:
85
91 std::vector<int> getBinIndices(const std::vector<float> values)
92 {
93 std::vector<int> binIndices(m_binEdges.size());
94
95 for (unsigned int i = 0; i < m_binEdges.size(); i++) {
96 std::vector<float> dimBinEdges = m_binEdges[i];
97 auto it = std::upper_bound(dimBinEdges.begin(), dimBinEdges.end(), values[i]);
98 if (it == dimBinEdges.end()) {
99 binIndices[i] = -1;
100 } else {
101 int index = std::distance(dimBinEdges.begin(), it) - 1;
102 binIndices[i] = index;
103 }
104 }
105 return binIndices;
106 }
107
111 std::vector<std::vector<float>> m_binEdges;
112
116 std::vector<int> m_nBins;
117
118 // 1: First class implementation.
120 };
121
132 class ECLChargedPIDPhasespaceCategory : public TObject {
133
134 public:
150
155 m_log_transform_offset("logTransformOffset", 1e-15),
156 m_max_possible_response_value("maxPossibleResponseValue", 1.0),
157 m_temperature("temperature", 1.0)
158 {};
159
167 ECLChargedPIDPhasespaceCategory(const std::string weightfilePath,
168 const MVAResponseTransformMode& mvaResponeTransformMode,
169 const std::unordered_map<unsigned int, unsigned int>& mvaIndexForHypothesis) :
170 m_log_transform_offset("logTransformOffset", 1e-15),
171 m_max_possible_response_value("maxPossibleResponseValue", 1.0),
172 m_temperature("temperature", 1.0)
173 {
174 // Load and serialize the MVA::Weightfile object into a string for storage in the database,
175 // otherwise there are issues w/ dictionary generation for the payload class...
176 Belle2::MVA::Weightfile weightfile;
177 if (boost::ends_with(weightfilePath, ".root")) {
178 weightfile = Belle2::MVA::Weightfile::loadFromROOTFile(weightfilePath);
179 } else if (boost::ends_with(weightfilePath, ".xml")) {
180 weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
181 } else {
182 B2WARNING("Unknown file extension for file: " << weightfilePath << ", fallback to xml...");
183 weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
184 }
185 std::stringstream ss;
187
188 // store
189 m_weight = ss.str();
190 m_mvaResponseTransformMode = mvaResponeTransformMode;
191 m_mvaIndexForHypothesis = mvaIndexForHypothesis;
192 }
193
198
202 const std::string getSerialisedWeight() const {return m_weight;}
203
207 void setSerialisedWeight(std::string weight) { m_weight = weight; }
208
213
219 const TF1* getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
220 {
221 return &m_pdfs.at(iMVAResponse).at(hypoPDG);
222 }
223
229 const TH1F* getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
230 {
231 return &m_cdfs.at(iMVAResponse).at(hypoPDG);
232 }
233
238 const std::vector<float>* getDecorrelationMatrix(const int hypoPDG) const
239 {
240 return &m_decorrelationMatrices.at(hypoPDG);
241 }
242
247 void setCDFs(std::vector<std::unordered_map<unsigned int, TH1F>> cdfs) {m_cdfs = cdfs;}
248
249
254 void setPDFs(std::vector<std::unordered_map<unsigned int, TF1>>& pdfs) {m_pdfs = pdfs;}
255
256
261 void setDecorrelationMatrixMap(std::unordered_map<unsigned int, std::vector<float>> decorrelationMatrices)
262 {
263 m_decorrelationMatrices = decorrelationMatrices;
264 }
265
269 void setlogTransformOffset(const float& offset)
270 {
271 m_log_transform_offset.SetVal(offset);
272 }
273
278 {
279 return m_log_transform_offset.GetVal();
280 }
281
285 void setTemperature(const float& temperature)
286 {
287 m_temperature.SetVal(temperature);
288 }
289
293 float getTemperature() const
294 {
295 return m_temperature.GetVal();
296 }
297
301 void setMaxPossibleResponseValue(const float& offset)
302 {
303 m_max_possible_response_value.SetVal(offset);
304 }
305
310 {
311 return m_max_possible_response_value.GetVal();
312 }
313
320 unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
321 {
322 return m_mvaIndexForHypothesis.at(hypoPDG);
323 }
324
325 private:
326
327 TParameter<float> m_log_transform_offset;
329 TParameter<float> m_temperature;
330
334 std::string m_weight;
335
340
345 std::vector<std::unordered_map<unsigned int, TF1>> m_pdfs;
346
351 std::unordered_map<unsigned int, unsigned int> m_mvaIndexForHypothesis;
352
358 std::vector<std::unordered_map<unsigned int, TH1F>> m_cdfs;
359
364 std::unordered_map<unsigned int, std::vector<float>> m_decorrelationMatrices;
365
366 // 1: First class implementation.
368 };
369
374 class ECLChargedPIDMVAWeights : public TObject {
375 public:
380
385
386
393
400 void storeMVAWeights(std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>& phasespaceCategories)
401 {
402 m_phasespaceCategories = phasespaceCategories;
403 }
404
409 const ECLChargedPIDPhasespaceCategory* getPhasespaceCategory(const unsigned int idx) const {return &m_phasespaceCategories.at(idx);}
410
414 const std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>* getPhasespaceCategories() const {return &m_phasespaceCategories;}
415
420 bool isPhasespaceCovered(const int linearBinIndex) const
421 {
422 // if the vector of values passed falls outside the defined phasespace.
423 if (linearBinIndex < 0) return false;
424 // if the vector is within the defined phasespace but we do not provide an ECLChargedPIDPhasespaceCategory object for this bin.
425 if (m_phasespaceCategories.count(linearBinIndex) == 0) return false;
426 return true;
427 }
428
433
434 unsigned int getLinearisedCategoryIndex(std::vector<float> values) const
435 {
436 if (!m_categories) {
437 B2FATAL("No N dimensional grid was found in the ECLChargedPIDMVA DB payload. This should not happen! Abort...");
438 }
439 return m_categories->getLinearisedBinIndex(values);
440 }
441
445 std::vector<std::string> getBinningVariables() const {return m_binningVariables;}
446
451 void setBinningVariables(std::vector<std::string>& binningVariables) {m_binningVariables = binningVariables;}
452 private:
458
462 std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory> m_phasespaceCategories;
463
467 std::vector<std::string> m_binningVariables;
468
469 // 1: First class implementation.
471
472 }; // class ECLChargedPIDMVAWeights
473
474} // Belle 2 Namespace
const ECLChargedPIDPhasespaceCategory * getPhasespaceCategory(const unsigned int idx) const
Returns the ith ECLChargedPIDPhasespaceCategory.
ECLChargedPIDMVAWeights()
Default constructor, necessary for ROOT to stream the object.
void setBinningVariables(std::vector< std::string > &binningVariables)
Set string definitions of the variables used in defining the phasespace categories.
std::vector< std::string > m_binningVariables
Stores the list of variables used to define the phasespace binning.
bool isPhasespaceCovered(const int linearBinIndex) const
Returns bool whether or not the given values are within the phasespace covered by the trainings in th...
ECLChargedPIDPhasespaceBinning * m_categories
An N Dimensional binning whose bins define the boundaries of the categories for which the training is...
const std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > * getPhasespaceCategories() const
Returns the map of phasespaceCategories.
std::vector< std::string > getBinningVariables() const
Returns string definitions of the variables used in defining the phasespace categories.
unsigned int getLinearisedCategoryIndex(std::vector< float > values) const
Returns the flattened 1D index of the N dimensional phasespace category grid.
void setWeightCategories(ECLChargedPIDPhasespaceBinning *h)
Set the N dimensional grid representing the categories for which weightfiles are defined.
ClassDef(ECLChargedPIDMVAWeights, 1)
ClassDef.
std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > m_phasespaceCategories
Stores the ECLChargedPIDPhasespaceCategory object for all the N dimensional categories.
void storeMVAWeights(std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > &phasespaceCategories)
Store the ECLChargedPIDPhasespaceCategory objects into the payload.
Class to store the N dimensional phasespace binning of the MVA categorical training.
std::vector< int > m_nBins
Vector of number of bins per dimension.
std::vector< int > getBinIndices(const std::vector< float > values)
Maps the vector of input values to their bin index in N dimensions.
int getLinearisedBinIndex(const std::vector< float > values)
Maps the vector of input values to a global bin index.
std::vector< std::vector< float > > m_binEdges
Vector of bin edges.
ClassDef(ECLChargedPIDPhasespaceBinning, 1)
ClassDef.
ECLChargedPIDPhasespaceBinning(const std::vector< std::vector< float > > binEdges)
Constructor.
Stores all required information for the ECLChargedPIDMVA for a phasespace category.
unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
Maps a charged stable pdg code to an index of the MVA response.
MVAResponseTransformMode m_mvaResponseTransformMode
Stores which transformation mode to apply to the mva responses.
void setSerialisedWeight(std::string weight)
Setter for serialised weightfile.
const std::string getSerialisedWeight() const
Getter for serialised weightfile.
void setCDFs(std::vector< std::unordered_map< unsigned int, TH1F > > cdfs)
Set the cdfs.
TParameter< float > m_max_possible_response_value
Max possible value of the mva response.
void setPDFs(std::vector< std::unordered_map< unsigned int, TF1 > > &pdfs)
Set the pdfs.
void setMaxPossibleResponseValue(const float &offset)
Set the max possible response value, used in log transformation of the responses.
ECLChargedPIDPhasespaceCategory()
Default constructor, necessary for ROOT to stream the object.
std::vector< std::unordered_map< unsigned int, TH1F > > m_cdfs
CDFs for each mva return value for each hypothesis.
MVAResponseTransformMode getTransformMode() const
Getter for the MVA transform mode.
void setDecorrelationMatrixMap(std::unordered_map< unsigned int, std::vector< float > > decorrelationMatrices)
Set the decorrelation matrices.
float getMaxPossibleResponseValue() const
Get the max possible response value, used in log transformation of the responses.
const TF1 * getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
Getter for pdfs.
ClassDef(ECLChargedPIDPhasespaceCategory, 1)
ClassDef.
std::unordered_map< unsigned int, std::vector< float > > m_decorrelationMatrices
Decorrelation matrices.
std::unordered_map< unsigned int, unsigned int > m_mvaIndexForHypothesis
Unordered map of abs(pdg_code) for the 6 charged stable hypotheses to index of the MVA response vecto...
void setTemperature(const float &temperature)
Set the temperature parameter used to calibrate the MVA.
ECLChargedPIDPhasespaceCategory(const std::string weightfilePath, const MVAResponseTransformMode &mvaResponeTransformMode, const std::unordered_map< unsigned int, unsigned int > &mvaIndexForHypothesis)
Useful constructor.
float getTemperature() const
Getter for the temperature.
TParameter< float > m_temperature
calibration factor for MVA responses.
const std::vector< float > * getDecorrelationMatrix(const int hypoPDG) const
Gets the decorrelation matrix for a given particle hypothesis.
std::vector< std::unordered_map< unsigned int, TF1 > > m_pdfs
A vector of unodered maps.
const TH1F * getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
Gets the cdf for the hypothesis pdg for a given response value.
TParameter< float > m_log_transform_offset
Small offset to avoid mva response values of 1.0 being log transformed to NaN.
std::string m_weight
Serialsed MVA weightfile.
float getLogTransformOffset() const
Getter for the log transform offset.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
@ c_DirectMVAResponse
Directly take the MVA response as the logL, useful if we train neural nets to learn the logL.
@ c_DecorrelationTransform
Decorrelation transform of the gaussian transformed mva responses.
@ c_GaussianTransform
Gaussian transform of the log transformed mva response.
void setlogTransformOffset(const float &offset)
Set the offset used in the log transformation to be consistent with the offset used when generating t...
The Weightfile class serializes all information about a training into an xml tree.
Definition Weightfile.h:38
static Weightfile loadFromXMLFile(const std::string &filename)
Static function which loads a Weightfile from a XML file.
static Weightfile loadFromROOTFile(const std::string &filename)
Static function which loads a Weightfile from a ROOT file.
static void saveToStream(Weightfile &weightfile, std::ostream &stream)
Static function which serializes a Weightfile to a stream.
Abstract base class for different kinds of events.