Belle II Software development
ECLChargedPIDMVAWeights.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10
11// FRAMEWORK
12#include <framework/logging/Logger.h>
13
14// MVA
15#include <mva/interface/Weightfile.h>
16
17// ROOT
18#include <TObject.h>
19#include <TF1.h>
20#include <TH1.h>
21#include <TParameter.h>
22
23// C++
24#include <unordered_map>
25#include <algorithm>
26
27namespace Belle2 {
32
36 class ECLChargedPIDPhasespaceBinning : public TObject {
37
38 public:
39
44
49 ECLChargedPIDPhasespaceBinning(const std::vector<std::vector<float>> binEdges)
50 {
51 m_binEdges = binEdges;
52 for (auto dimensionBinEdges : binEdges) {
53 m_nBins.push_back(dimensionBinEdges.size() - 1);
54 }
55 }
56
61
66 int getLinearisedBinIndex(const std::vector<float> values)
67 {
68 int globalBin(-1);
69 std::vector<int> binIndices = getBinIndices(values);
70 for (unsigned int i = 0; i < binIndices.size(); i++) {
71 if (binIndices[i] < 0) return -1;
72 if (i == 0) {
73 globalBin = binIndices[i];
74 } else {
75 globalBin = globalBin * m_nBins[i] + binIndices[i];
76 }
77 }
78 return globalBin;
79 }
80
81 private:
82
88 std::vector<int> getBinIndices(const std::vector<float> values)
89 {
90 std::vector<int> binIndices(m_binEdges.size());
91
92 for (unsigned int i = 0; i < m_binEdges.size(); i++) {
93 std::vector<float> dimBinEdges = m_binEdges[i];
94 auto it = std::upper_bound(dimBinEdges.begin(), dimBinEdges.end(), values[i]);
95 if (it == dimBinEdges.end()) {
96 binIndices[i] = -1;
97 } else {
98 int index = std::distance(dimBinEdges.begin(), it) - 1;
99 binIndices[i] = index;
100 }
101 }
102 return binIndices;
103 }
104
108 std::vector<std::vector<float>> m_binEdges;
109
113 std::vector<int> m_nBins;
114
115 // 1: First class implementation.
117 };
118
129 class ECLChargedPIDPhasespaceCategory : public TObject {
130
131 public:
147
152 m_log_transform_offset("logTransformOffset", 1e-15),
153 m_max_possible_response_value("maxPossibleResponseValue", 1.0),
154 m_temperature("temperature", 1.0)
155 {};
156
164 ECLChargedPIDPhasespaceCategory(const std::string weightfilePath,
165 const MVAResponseTransformMode& mvaResponeTransformMode,
166 const std::unordered_map<unsigned int, unsigned int>& mvaIndexForHypothesis) :
167 m_log_transform_offset("logTransformOffset", 1e-15),
168 m_max_possible_response_value("maxPossibleResponseValue", 1.0),
169 m_temperature("temperature", 1.0)
170 {
171 // Load and serialize the MVA::Weightfile object into a string for storage in the database,
172 // otherwise there are issues w/ dictionary generation for the payload class...
173 Belle2::MVA::Weightfile weightfile;
174 if (weightfilePath.ends_with(".root")) {
175 weightfile = Belle2::MVA::Weightfile::loadFromROOTFile(weightfilePath);
176 } else if (weightfilePath.ends_with(".xml")) {
177 weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
178 } else {
179 B2WARNING("Unknown file extension for file: " << weightfilePath << ", fallback to xml...");
180 weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
181 }
182 std::stringstream ss;
184
185 // store
186 m_weight = ss.str();
187 m_mvaResponseTransformMode = mvaResponeTransformMode;
188 m_mvaIndexForHypothesis = mvaIndexForHypothesis;
189 }
190
195
199 const std::string getSerialisedWeight() const {return m_weight;}
200
204 void setSerialisedWeight(std::string weight) { m_weight = weight; }
205
210
216 const TF1* getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
217 {
218 return &m_pdfs.at(iMVAResponse).at(hypoPDG);
219 }
220
226 const TH1F* getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
227 {
228 return &m_cdfs.at(iMVAResponse).at(hypoPDG);
229 }
230
235 const std::vector<float>* getDecorrelationMatrix(const int hypoPDG) const
236 {
237 return &m_decorrelationMatrices.at(hypoPDG);
238 }
239
244 void setCDFs(std::vector<std::unordered_map<unsigned int, TH1F>> cdfs) {m_cdfs = cdfs;}
245
246
251 void setPDFs(std::vector<std::unordered_map<unsigned int, TF1>>& pdfs) {m_pdfs = pdfs;}
252
253
258 void setDecorrelationMatrixMap(std::unordered_map<unsigned int, std::vector<float>> decorrelationMatrices)
259 {
260 m_decorrelationMatrices = decorrelationMatrices;
261 }
262
266 void setlogTransformOffset(const float& offset)
267 {
268 m_log_transform_offset.SetVal(offset);
269 }
270
275 {
276 return m_log_transform_offset.GetVal();
277 }
278
282 void setTemperature(const float& temperature)
283 {
284 m_temperature.SetVal(temperature);
285 }
286
290 float getTemperature() const
291 {
292 return m_temperature.GetVal();
293 }
294
298 void setMaxPossibleResponseValue(const float& offset)
299 {
300 m_max_possible_response_value.SetVal(offset);
301 }
302
307 {
308 return m_max_possible_response_value.GetVal();
309 }
310
317 unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
318 {
319 return m_mvaIndexForHypothesis.at(hypoPDG);
320 }
321
322 private:
323
324 TParameter<float> m_log_transform_offset;
326 TParameter<float> m_temperature;
327
331 std::string m_weight;
332
337
342 std::vector<std::unordered_map<unsigned int, TF1>> m_pdfs;
343
348 std::unordered_map<unsigned int, unsigned int> m_mvaIndexForHypothesis;
349
355 std::vector<std::unordered_map<unsigned int, TH1F>> m_cdfs;
356
361 std::unordered_map<unsigned int, std::vector<float>> m_decorrelationMatrices;
362
363 // 1: First class implementation.
365 };
366
371 class ECLChargedPIDMVAWeights : public TObject {
372 public:
377
382
383
390
397 void storeMVAWeights(std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>& phasespaceCategories)
398 {
399 m_phasespaceCategories = phasespaceCategories;
400 }
401
406 const ECLChargedPIDPhasespaceCategory* getPhasespaceCategory(const unsigned int idx) const {return &m_phasespaceCategories.at(idx);}
407
411 const std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>* getPhasespaceCategories() const {return &m_phasespaceCategories;}
412
417 bool isPhasespaceCovered(const int linearBinIndex) const
418 {
419 // if the vector of values passed falls outside the defined phasespace.
420 if (linearBinIndex < 0) return false;
421 // if the vector is within the defined phasespace but we do not provide an ECLChargedPIDPhasespaceCategory object for this bin.
422 if (m_phasespaceCategories.count(linearBinIndex) == 0) return false;
423 return true;
424 }
425
430
431 unsigned int getLinearisedCategoryIndex(std::vector<float> values) const
432 {
433 if (!m_categories) {
434 B2FATAL("No N dimensional grid was found in the ECLChargedPIDMVA DB payload. This should not happen! Abort...");
435 }
436 return m_categories->getLinearisedBinIndex(values);
437 }
438
442 std::vector<std::string> getBinningVariables() const {return m_binningVariables;}
443
448 void setBinningVariables(std::vector<std::string>& binningVariables) {m_binningVariables = binningVariables;}
449 private:
455
459 std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory> m_phasespaceCategories;
460
464 std::vector<std::string> m_binningVariables;
465
466 // 1: First class implementation.
468
469 }; // class ECLChargedPIDMVAWeights
470
471} // Belle 2 Namespace
const ECLChargedPIDPhasespaceCategory * getPhasespaceCategory(const unsigned int idx) const
Returns the ith ECLChargedPIDPhasespaceCategory.
ECLChargedPIDMVAWeights()
Default constructor, necessary for ROOT to stream the object.
void setBinningVariables(std::vector< std::string > &binningVariables)
Set string definitions of the variables used in defining the phasespace categories.
std::vector< std::string > m_binningVariables
Stores the list of variables used to define the phasespace binning.
bool isPhasespaceCovered(const int linearBinIndex) const
Returns bool whether or not the given values are within the phasespace covered by the trainings in th...
ECLChargedPIDPhasespaceBinning * m_categories
An N Dimensional binning whose bins define the boundaries of the categories for which the training is...
const std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > * getPhasespaceCategories() const
Returns the map of phasespaceCategories.
std::vector< std::string > getBinningVariables() const
Returns string definitions of the variables used in defining the phasespace categories.
unsigned int getLinearisedCategoryIndex(std::vector< float > values) const
Returns the flattened 1D index of the N dimensional phasespace category grid.
void setWeightCategories(ECLChargedPIDPhasespaceBinning *h)
Set the N dimensional grid representing the categories for which weightfiles are defined.
ClassDef(ECLChargedPIDMVAWeights, 1)
ClassDef.
std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > m_phasespaceCategories
Stores the ECLChargedPIDPhasespaceCategory object for all the N dimensional categories.
void storeMVAWeights(std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > &phasespaceCategories)
Store the ECLChargedPIDPhasespaceCategory objects into the payload.
Class to store the N dimensional phasespace binning of the MVA categorical training.
std::vector< int > m_nBins
Vector of number of bins per dimension.
std::vector< int > getBinIndices(const std::vector< float > values)
Maps the vector of input values to their bin index in N dimensions.
int getLinearisedBinIndex(const std::vector< float > values)
Maps the vector of input values to a global bin index.
std::vector< std::vector< float > > m_binEdges
Vector of bin edges.
ClassDef(ECLChargedPIDPhasespaceBinning, 1)
ClassDef.
ECLChargedPIDPhasespaceBinning(const std::vector< std::vector< float > > binEdges)
Constructor.
Stores all required information for the ECLChargedPIDMVA for a phasespace category.
unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
Maps a charged stable pdg code to an index of the MVA response.
MVAResponseTransformMode m_mvaResponseTransformMode
Stores which transformation mode to apply to the mva responses.
void setSerialisedWeight(std::string weight)
Setter for serialised weightfile.
const std::string getSerialisedWeight() const
Getter for serialised weightfile.
void setCDFs(std::vector< std::unordered_map< unsigned int, TH1F > > cdfs)
Set the cdfs.
TParameter< float > m_max_possible_response_value
Max possible value of the mva response.
void setPDFs(std::vector< std::unordered_map< unsigned int, TF1 > > &pdfs)
Set the pdfs.
void setMaxPossibleResponseValue(const float &offset)
Set the max possible response value, used in log transformation of the responses.
ECLChargedPIDPhasespaceCategory()
Default constructor, necessary for ROOT to stream the object.
std::vector< std::unordered_map< unsigned int, TH1F > > m_cdfs
CDFs for each mva return value for each hypothesis.
MVAResponseTransformMode getTransformMode() const
Getter for the MVA transform mode.
void setDecorrelationMatrixMap(std::unordered_map< unsigned int, std::vector< float > > decorrelationMatrices)
Set the decorrelation matrices.
float getMaxPossibleResponseValue() const
Get the max possible response value, used in log transformation of the responses.
const TF1 * getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
Getter for pdfs.
ClassDef(ECLChargedPIDPhasespaceCategory, 1)
ClassDef.
std::unordered_map< unsigned int, std::vector< float > > m_decorrelationMatrices
Decorrelation matrices.
std::unordered_map< unsigned int, unsigned int > m_mvaIndexForHypothesis
Unordered map of abs(pdg_code) for the 6 charged stable hypotheses to index of the MVA response vecto...
void setTemperature(const float &temperature)
Set the temperature parameter used to calibrate the MVA.
ECLChargedPIDPhasespaceCategory(const std::string weightfilePath, const MVAResponseTransformMode &mvaResponeTransformMode, const std::unordered_map< unsigned int, unsigned int > &mvaIndexForHypothesis)
Useful constructor.
float getTemperature() const
Getter for the temperature.
TParameter< float > m_temperature
calibration factor for MVA responses.
const std::vector< float > * getDecorrelationMatrix(const int hypoPDG) const
Gets the decorrelation matrix for a given particle hypothesis.
std::vector< std::unordered_map< unsigned int, TF1 > > m_pdfs
A vector of unodered maps.
const TH1F * getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
Gets the cdf for the hypothesis pdg for a given response value.
TParameter< float > m_log_transform_offset
Small offset to avoid mva response values of 1.0 being log transformed to NaN.
std::string m_weight
Serialsed MVA weightfile.
float getLogTransformOffset() const
Getter for the log transform offset.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
@ c_DirectMVAResponse
Directly take the MVA response as the logL, useful if we train neural nets to learn the logL.
@ c_DecorrelationTransform
Decorrelation transform of the gaussian transformed mva responses.
@ c_GaussianTransform
Gaussian transform of the log transformed mva response.
void setlogTransformOffset(const float &offset)
Set the offset used in the log transformation to be consistent with the offset used when generating t...
The Weightfile class serializes all information about a training into an xml tree.
Definition Weightfile.h:38
static void saveToStream(Weightfile &weightfile, std::ostream &stream)
Static function which serializes a Weightfile to a stream.
Abstract base class for different kinds of events.