Belle II Software  release-08-01-10
ECLChargedPIDMVAWeights.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 
11 // FRAMEWORK
12 #include <framework/logging/Logger.h>
13 
14 // MVA
15 #include <mva/interface/Weightfile.h>
16 
17 // ROOT
18 #include <TObject.h>
19 #include <TF1.h>
20 #include <TH1.h>
21 #include <TParameter.h>
22 
23 //BOOST
24 #include <boost/algorithm/string/predicate.hpp>
25 
26 // C++
27 #include <unordered_map>
28 #include <algorithm>
29 
30 namespace Belle2 {
39  class ECLChargedPIDPhasespaceBinning : public TObject {
40 
41  public:
42 
47 
52  ECLChargedPIDPhasespaceBinning(const std::vector<std::vector<float>> binEdges)
53  {
54  m_binEdges = binEdges;
55  for (auto dimensionBinEdges : binEdges) {
56  m_nBins.push_back(dimensionBinEdges.size() - 1);
57  }
58  }
59 
64 
69  int getLinearisedBinIndex(const std::vector<float> values)
70  {
71  int globalBin;
72  std::vector<int> binIndices = getBinIndices(values);
73  for (unsigned int i = 0; i < binIndices.size(); i++) {
74  if (binIndices[i] < 0) return -1;
75  if (i == 0) {
76  globalBin = binIndices[i];
77  } else {
78  globalBin = globalBin * m_nBins[i] + binIndices[i];
79  }
80  }
81  return globalBin;
82  }
83 
84  private:
85 
91  std::vector<int> getBinIndices(const std::vector<float> values)
92  {
93  std::vector<int> binIndices(m_binEdges.size());
94 
95  for (unsigned int i = 0; i < m_binEdges.size(); i++) {
96  std::vector<float> dimBinEdges = m_binEdges[i];
97  auto it = std::upper_bound(dimBinEdges.begin(), dimBinEdges.end(), values[i]);
98  if (it == dimBinEdges.end()) {
99  binIndices[i] = -1;
100  } else {
101  int index = std::distance(dimBinEdges.begin(), it) - 1;
102  binIndices[i] = index;
103  }
104  }
105  return binIndices;
106  }
107 
111  std::vector<std::vector<float>> m_binEdges;
112 
116  std::vector<int> m_nBins;
117 
118  // 1: First class implementation.
120  };
121 
132  class ECLChargedPIDPhasespaceCategory : public TObject {
133 
134  public:
136  enum class MVAResponseTransformMode : unsigned int {
138  c_LogTransform = 0,
148  c_LogMVAResponse = 5
149  };
150 
155  m_log_transform_offset("logTransformOffset", 1e-15),
156  m_max_possible_response_value("maxPossibleResponseValue", 1.0),
157  m_temperature("temperature", 1.0)
158  {};
159 
167  ECLChargedPIDPhasespaceCategory(const std::string weightfilePath,
168  const MVAResponseTransformMode& mvaResponeTransformMode,
169  const std::unordered_map<unsigned int, unsigned int>& mvaIndexForHypothesis) :
170  m_log_transform_offset("logTransformOffset", 1e-15),
171  m_max_possible_response_value("maxPossibleResponseValue", 1.0),
172  m_temperature("temperature", 1.0)
173  {
174  // Load and serialize the MVA::Weightfile object into a string for storage in the database,
175  // otherwise there are issues w/ dictionary generation for the payload class...
176  Belle2::MVA::Weightfile weightfile;
177  if (boost::ends_with(weightfilePath, ".root")) {
178  weightfile = Belle2::MVA::Weightfile::loadFromROOTFile(weightfilePath);
179  } else if (boost::ends_with(weightfilePath, ".xml")) {
180  weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
181  } else {
182  B2WARNING("Unkown file extension for file: " << weightfilePath << ", fallback to xml...");
183  weightfile = Belle2::MVA::Weightfile::loadFromXMLFile(weightfilePath);
184  }
185  std::stringstream ss;
187 
188  // store
189  m_weight = ss.str();
190  m_mvaResponseTransformMode = mvaResponeTransformMode;
191  m_mvaIndexForHypothesis = mvaIndexForHypothesis;
192  }
193 
198 
202  const std::string getSerialisedWeight() const {return m_weight;}
203 
208 
214  const TF1* getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
215  {
216  return &m_pdfs.at(iMVAResponse).at(hypoPDG);
217  }
218 
224  const TH1F* getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
225  {
226  return &m_cdfs.at(iMVAResponse).at(hypoPDG);
227  }
228 
233  const std::vector<float>* getDecorrelationMatrix(const int hypoPDG) const
234  {
235  return &m_decorrelationMatrices.at(hypoPDG);
236  }
237 
242  void setCDFs(std::vector<std::unordered_map<unsigned int, TH1F>> cdfs) {m_cdfs = cdfs;}
243 
244 
249  void setPDFs(std::vector<std::unordered_map<unsigned int, TF1>>& pdfs) {m_pdfs = pdfs;}
250 
251 
256  void setDecorrelationMatrixMap(std::unordered_map<unsigned int, std::vector<float>> decorrelationMatrices)
257  {
258  m_decorrelationMatrices = decorrelationMatrices;
259  }
260 
264  void setlogTransformOffset(const float& offset)
265  {
266  m_log_transform_offset.SetVal(offset);
267  }
268 
272  float getLogTransformOffset() const
273  {
274  return m_log_transform_offset.GetVal();
275  }
276 
280  void setTemperature(const float& temperature)
281  {
282  m_temperature.SetVal(temperature);
283  }
284 
288  float getTemperature() const
289  {
290  return m_temperature.GetVal();
291  }
292 
296  void setMaxPossibleResponseValue(const float& offset)
297  {
298  m_max_possible_response_value.SetVal(offset);
299  }
300 
305  {
306  return m_max_possible_response_value.GetVal();
307  }
308 
315  unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
316  {
317  return m_mvaIndexForHypothesis.at(hypoPDG);
318  }
319 
320  private:
321 
322  TParameter<float> m_log_transform_offset;
323  TParameter<float> m_max_possible_response_value;
324  TParameter<float> m_temperature;
329  std::string m_weight;
330 
335 
340  std::vector<std::unordered_map<unsigned int, TF1>> m_pdfs;
341 
346  std::unordered_map<unsigned int, unsigned int> m_mvaIndexForHypothesis;
347 
353  std::vector<std::unordered_map<unsigned int, TH1F>> m_cdfs;
354 
359  std::unordered_map<unsigned int, std::vector<float>> m_decorrelationMatrices;
360 
361  // 1: First class implementation.
363  };
364 
369  class ECLChargedPIDMVAWeights : public TObject {
370  public:
375 
380 
381 
388 
395  void storeMVAWeights(std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>& phasespaceCategories)
396  {
397  m_phasespaceCategories = phasespaceCategories;
398  }
399 
404  const ECLChargedPIDPhasespaceCategory* getPhasespaceCategory(const unsigned int idx) const {return &m_phasespaceCategories.at(idx);}
405 
409  const std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory>* getPhasespaceCategories() const {return &m_phasespaceCategories;}
410 
415  bool isPhasespaceCovered(const int linearBinIndex) const
416  {
417  // if the vector of values passed falls outside the defined phasespace.
418  if (linearBinIndex < 0) return false;
419  // if the vector is within the defined phasespace but we do not provide an ECLChargedPIDPhasespaceCategory object for this bin.
420  if (m_phasespaceCategories.count(linearBinIndex) == 0) return false;
421  return true;
422  }
423 
429  unsigned int getLinearisedCategoryIndex(std::vector<float> values) const
430  {
431  if (!m_categories) {
432  B2FATAL("No N dimensional grid was found in the ECLChargedPIDMVA DB payload. This should not happen! Abort...");
433  }
434  return m_categories->getLinearisedBinIndex(values);
435  }
436 
440  std::vector<std::string> getBinningVariables() const {return m_binningVariables;}
441 
446  void setBinningVariables(std::vector<std::string>& binningVariables) {m_binningVariables = binningVariables;}
447  private:
453 
457  std::unordered_map<unsigned int, ECLChargedPIDPhasespaceCategory> m_phasespaceCategories;
458 
462  std::vector<std::string> m_binningVariables;
463 
464  // 1: First class implementation.
467  }; // class ECLChargedPIDMVAWeights
469 } // Belle 2 Namespace
Class to contain payload of everything needed for MVA based charged particle identification.
const ECLChargedPIDPhasespaceCategory * getPhasespaceCategory(const unsigned int idx) const
Returns the ith ECLChargedPIDPhasespaceCategory.
ECLChargedPIDMVAWeights()
Default constructor, necessary for ROOT to stream the object.
void setBinningVariables(std::vector< std::string > &binningVariables)
Set string definitions of the variables used in defining the phasespace categories.
const std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > * getPhasespaceCategories() const
Returns the map of phasespaceCategories.
std::vector< std::string > m_binningVariables
Stores the list of variables used to define the phasespace binning.
std::vector< std::string > getBinningVariables() const
Returns string definitions of the variables used in defining the phasespace categories.
bool isPhasespaceCovered(const int linearBinIndex) const
Returns bool whether or not the given values are within the phasespace covered by the trainings in th...
ECLChargedPIDPhasespaceBinning * m_categories
An N Dimensional binning whose bins define the boundaries of the categories for which the training is...
unsigned int getLinearisedCategoryIndex(std::vector< float > values) const
Returns the flattened 1D index of the N dimensional phasespace category grid.
void setWeightCategories(ECLChargedPIDPhasespaceBinning *h)
Set the N dimensional grid representing the categories for which weightfiles are defined.
ClassDef(ECLChargedPIDMVAWeights, 1)
ClassDef
std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > m_phasespaceCategories
Stores the ECLChargedPIDPhasespaceCategory object for all the N dimensional categories.
void storeMVAWeights(std::unordered_map< unsigned int, ECLChargedPIDPhasespaceCategory > &phasespaceCategories)
Store the ECLChargedPIDPhasespaceCategory objects into the payload.
Class to store the N dimensional phasespace binning of the MVA categorical training.
std::vector< int > m_nBins
Vector of number of bins per dimension.
std::vector< int > getBinIndices(const std::vector< float > values)
Maps the vector of input values to their bin index in N dimensions.
int getLinearisedBinIndex(const std::vector< float > values)
Maps the vector of input values to a global bin index.
std::vector< std::vector< float > > m_binEdges
Vector of bin edges.
ClassDef(ECLChargedPIDPhasespaceBinning, 1)
ClassDef.
ECLChargedPIDPhasespaceBinning(const std::vector< std::vector< float >> binEdges)
Constructor.
Stores all required information for the ECLChargedPIDMVA for a phasespace category.
unsigned int getMVAIndexForHypothesis(const unsigned int hypoPDG) const
Maps a charged stable pdg code to an index of the MVA response.
MVAResponseTransformMode m_mvaResponseTransformMode
Stores which transformation mode to apply to the mva responses.
const std::string getSerialisedWeight() const
Getter for serialised weightfile.
void setCDFs(std::vector< std::unordered_map< unsigned int, TH1F >> cdfs)
Set the cdfs.
TParameter< float > m_max_possible_response_value
Max possible value of the mva response.
const std::vector< float > * getDecorrelationMatrix(const int hypoPDG) const
Gets the decorrelation matrix for a given particle hypothesis.
void setMaxPossibleResponseValue(const float &offset)
Set the max possible response value, used in log transformation of the responses.
ECLChargedPIDPhasespaceCategory()
Default constructor, necessary for ROOT to stream the object.
std::vector< std::unordered_map< unsigned int, TH1F > > m_cdfs
CDFs for each mva return value for each hypothesis.
MVAResponseTransformMode getTransformMode() const
Getter for the MVA transform mode.
float getMaxPossibleResponseValue() const
Get the max possible response value, used in log transformation of the responses.
ClassDef(ECLChargedPIDPhasespaceCategory, 1)
ClassDef.
void setDecorrelationMatrixMap(std::unordered_map< unsigned int, std::vector< float >> decorrelationMatrices)
Set the decorrelation matrices.
std::unordered_map< unsigned int, std::vector< float > > m_decorrelationMatrices
Decorrelation matrices.
const TH1F * getCDF(const unsigned int iMVAResponse, const int hypoPDG) const
Gets the cdf for the hypothesis pdg for a given response value.
std::unordered_map< unsigned int, unsigned int > m_mvaIndexForHypothesis
Unordered map of abs(pdg_code) for the 6 charged stable hypotheses to index of the MVA response vecto...
void setTemperature(const float &temperature)
Set the temperature parameter used to calibrate the MVA.
void setPDFs(std::vector< std::unordered_map< unsigned int, TF1 >> &pdfs)
Set the pdfs.
ECLChargedPIDPhasespaceCategory(const std::string weightfilePath, const MVAResponseTransformMode &mvaResponeTransformMode, const std::unordered_map< unsigned int, unsigned int > &mvaIndexForHypothesis)
Useful constructor.
float getTemperature() const
Getter for the temperature.
TParameter< float > m_temperature
calibration factor for MVA responses.
std::vector< std::unordered_map< unsigned int, TF1 > > m_pdfs
A vector of unodered maps.
TParameter< float > m_log_transform_offset
Small offset to avoid mva response values of 1.0 being log transformed to NaN.
std::string m_weight
Serialsed MVA weightfile.
float getLogTransformOffset() const
Getter for the log transform offset.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
@ c_DirectMVAResponse
Directly take the MVA response as the logL, useful if we train neural nets to learn the logL.
@ c_DecorrelationTransform
Decorrelation transform of the gaussian transformed mva responses.
@ c_GaussianTransform
Gaussian transform of the log transformed mva response.
const TF1 * getPDF(const unsigned int iMVAResponse, const unsigned int hypoPDG) const
Getter for pdfs.
void setlogTransformOffset(const float &offset)
Set the offset used in the log transformation to be consistent with the offset used when generating t...
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
static Weightfile loadFromXMLFile(const std::string &filename)
Static function which loads a Weightfile from a XML file.
Definition: Weightfile.cc:240
static Weightfile loadFromROOTFile(const std::string &filename)
Static function which loads a Weightfile from a ROOT file.
Definition: Weightfile.cc:217
static void saveToStream(Weightfile &weightfile, std::ostream &stream)
Static function which serializes a Weightfile to a stream.
Definition: Weightfile.cc:185
Abstract base class for different kinds of events.