Belle II Software  release-08-01-10
ECLChargedPIDMVAModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 /* Own header. */
10 #include <ecl/modules/eclChargedPIDMVA/ECLChargedPIDMVAModule.h>
11 
12 /* Basf2 headers. */
13 #include <analysis/VariableManager/Manager.h>
14 #include <analysis/dataobjects/Particle.h>
15 #include <framework/gearbox/Const.h>
16 #include <framework/logging/Logger.h>
17 #include <mdst/dataobjects/Track.h>
18 #include <mva/interface/Interface.h>
19 
20 /* ROOT headers. */
21 #include <TMath.h>
22 
23 /* C++ headers. */
24 #include <limits>
25 #include <string>
26 
27 using namespace Belle2;
29 
30 REG_MODULE(ECLChargedPIDMVA);
31 
33 {
34  setDescription("This module implements charged particle identification using ECL-related observables via a multiclass MVA. For each track a set of relevant ECL variables(shower shape, PSD etc.) are fed to the MVA method which is stored in a conditions database payload. The MVA output variables are then used to construct a likelihood from pdfs also stored in the payload. The likelihood is then stored in the ECLPidLikelihood object.");
36  addParam("payloadName",
38  "The name of the database payload object with the MVA weights.",
39  std::string("ECLChargedPidMVAWeights"));
40 }
41 
43 
45 {
46  m_eventMetaData.isRequired();
47 
48  m_mvaWeights = std::make_unique<DBObjPtr<ECLChargedPIDMVAWeights>>(m_payload_name);
49 
50  m_eclPidLikelihoods.registerInDataStore();
51  m_tracks.registerRelationTo(m_eclPidLikelihoods);
52 }
53 
55 {
56  if (!m_mvaWeights) {B2FATAL("No ECLChargedPidMVAWeights payload found in database!");}
57 }
58 
60 {
61  B2DEBUG(22, "Run: " << m_eventMetaData->getRun() <<
62  ". Loading supported MVA interfaces for multi-class charged particle identification.");
63 
65  m_binningVariables = manager.getVariables((*m_mvaWeights.get())->getBinningVariables());
66 
68  auto supported_interfaces = MVA::AbstractInterface::getSupportedInterfaces();
69 
70  for (const auto& iterator : * (*m_mvaWeights.get())->getPhasespaceCategories()) {
71 
72  B2DEBUG(22, "\t\tweightfile[" << iterator.first << "]");
73 
74  // De-serialize the string into an MVA::Weightfile object.
75  std::stringstream ss(iterator.second.getSerialisedWeight());
76  auto weightfile = MVA::Weightfile::loadFromStream(ss);
77 
78  MVA::GeneralOptions general_options;
79  weightfile.getOptions(general_options);
80 
81  // Store an MVA::Expert object.
82  m_experts[iterator.first] = supported_interfaces[general_options.m_method]->getExpert();
83  m_experts.at(iterator.first)->load(weightfile);
84 
85  B2DEBUG(22, "\t\tweightfile at " << iterator.first << " successfully initialised.");
86 
87  // Get the full version of the variable names.
88  // These are stored in the xml as an additional vector.
89  const std::string identifier = std::string("de_aliased_clf_vars");
90  auto clf_vars = weightfile.getVector<std::string>(identifier);
91  m_variables[iterator.first] = manager.getVariables(clf_vars);
92 
93  B2DEBUG(22, "\t\t Weightfile at " << iterator.first << " has " << m_variables[iterator.first].size() << " features.");
94 
95  std::vector<float> features(clf_vars.size(), 0.0);
96  std::vector<float> spectators;
97 
98  m_datasets[iterator.first] = std::make_unique<MVA::SingleDataset>(general_options, features, 1.0, spectators);
99  }
100 }
101 
103 {
104  (*m_mvaWeights.get()).addCallback([this]() { checkDBPayloads();});
105  checkDBPayloads();
106  (*m_mvaWeights.get()).addCallback([this]() { initializeMVA(); });
107  initializeMVA();
108 }
109 
111 {
112  for (const auto& track : m_tracks) {
113 
114  // Load the pion fit hypothesis or closest in mass.
115  const TrackFitResult* fitRes = track.getTrackFitResultWithClosestMass(Const::pion);
116  if (fitRes == nullptr) continue;
117 
118  // If a track omega value is very small trackFitResult might return a charge of 0.
119  // The particle is then not able to be assigned a pdg code causing a crash.
120  // Skip such tracks.
121  const int charge = fitRes->getChargeSign();
122  if (charge == 0) continue;
123 
124  // Create a particle object so that we can use the variable manager.
125  const Particle particle = Particle(&track, Const::pion);
126 
127  // Get global bin index for track corresponding to N dimensional binning.
128  std::vector<float> binningVariableValues(m_binningVariables.size());
129  for (unsigned int ivar(0); ivar < binningVariableValues.size(); ivar++) {
130  auto varobj = m_binningVariables.at(ivar);
131  binningVariableValues.at(ivar) = evaluateVariable(varobj, &particle);
132  }
133  const int linearCategoryIndex = (*m_mvaWeights.get())->getLinearisedCategoryIndex(binningVariableValues);
134 
135  // Require we cover the phasespace.
136  // Alternatively could take closest covered region but behaviour will not be well understood.
137  // After this check the linearCategoryIndex is guaranteed to be positive.
138  if (!(*m_mvaWeights.get())->isPhasespaceCovered(linearCategoryIndex)) continue;
139 
140  // Get the phasespaceCategory
141  const auto phasespaceCategory = (*m_mvaWeights.get())->getPhasespaceCategory(linearCategoryIndex);
142  const auto transformMode = phasespaceCategory->getTransformMode();
143 
144  // Fill the feature vectors
145  unsigned int nvars = m_variables.at(linearCategoryIndex).size();
146  for (unsigned int ivar(0); ivar < nvars; ++ivar) {
147  auto varobj = m_variables.at(linearCategoryIndex).at(ivar);
148  m_datasets.at(linearCategoryIndex)->m_input[ivar] = evaluateVariable(varobj, &particle);
149  }
150 
151  // Get the mva response to be converted to a likelihood
152  std::vector<float> scores = m_experts.at(linearCategoryIndex)->applyMulticlass(*m_datasets.at(linearCategoryIndex))[0];
153 
154  // Log transform the scores
155  // This turns the MVA output which, if using the nominal TMVA MulticlassBDT,
156  // is heavily peaked at 0 and 1 into a smooth curve.
157  // We can then evaluate the likelihoods from this curve directly or further transform the responses.
158  for (unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
159  if ((transformMode != transformModes::c_DirectMVAResponse) and
160  (transformMode != transformModes::c_LogMVAResponse)) {
161  scores[iResponse] = logTransformation(scores[iResponse],
162  phasespaceCategory->getLogTransformOffset(),
163  phasespaceCategory->getMaxPossibleResponseValue());
164  }
165  }
166 
167  B2DEBUG(22, "Scores: ");
168  for (unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
169  B2DEBUG(22, "\t\t " << iResponse << " : " << scores[iResponse]);
170  }
171 
172  float logLikelihoods[Const::ChargedStable::c_SetSize];
173 
174  // Order of loop is defined in UnitConst.cc: e, mu, pi, K, p, d
175  for (const auto& hypo : Const::chargedStableSet) {
176  unsigned int hypo_idx = hypo.getIndex();
177  auto absPdgId = abs(hypo.getPDGCode());
178 
179  // Copy the scores so they aren't transformed in place
180  std::vector<float> transformed_scores;
181  transformed_scores = scores;
182 
183  // Perform extra transformations if they are booked
184  if ((transformMode == transformModes::c_GaussianTransform) or
185  (transformMode == transformModes::c_DecorrelationTransform)) {
186 
187  // Gaussian transform
188  for (unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
189  transformed_scores[iResponse] = gaussTransformation(scores[iResponse], phasespaceCategory->getCDF(absPdgId, iResponse));
190  }
191  if (transformMode == transformModes::c_DecorrelationTransform) {
192  transformed_scores = decorrTransformation(transformed_scores, phasespaceCategory->getDecorrelationMatrix(absPdgId));
193  }
194  }
195 
196  // Get the pdf values for each response value
197  float logL = 0.0;
198  if ((transformMode == transformModes::c_DirectMVAResponse) or
199  (transformMode == transformModes::c_LogMVAResponse)) {
200 
201  logLikelihoods[hypo_idx] = scores[phasespaceCategory->getMVAIndexForHypothesis(absPdgId)];
202  if (transformMode == transformModes::c_LogMVAResponse) {
203  logLikelihoods[hypo_idx] = (std::isnormal(logLikelihoods[hypo_idx])
204  && logLikelihoods[hypo_idx] > 0) ? std::log(logLikelihoods[hypo_idx]) : c_dummyLogL;
205  }
206  logLikelihoods[hypo_idx] = logLikelihoods[hypo_idx] / phasespaceCategory->getTemperature();
207  continue;
208  }
209  B2DEBUG(22, "MVA Index for hypo " << absPdgId << " : " << phasespaceCategory->getMVAIndexForHypothesis(absPdgId));
210 
211  for (unsigned int iResponse = 0; iResponse < transformed_scores.size(); iResponse++) {
212  if ((transformMode == transformModes::c_LogTransformSingle) and
213  (phasespaceCategory->getMVAIndexForHypothesis(absPdgId) != iResponse)) {continue;}
214 
215  double xmin, xmax;
216  phasespaceCategory->getPDF(iResponse, absPdgId)->GetRange(xmin, xmax);
217  // Kick the response back into the range of the PDF (alternatively consider logL = std::numeric_limits<float>::min() if outside range)
218  float transformed_score_copy = transformed_scores[iResponse];
219  if (transformed_score_copy < xmin) transformed_score_copy = xmin + 1e-5;
220  if (transformed_score_copy > xmax) transformed_score_copy = xmax - 1e-5;
221 
222  float pdfval = phasespaceCategory->getPDF(iResponse, absPdgId)->Eval(transformed_score_copy);
223  // Don't take a log of inf or 0
224  logL += (std::isnormal(pdfval) && pdfval > 0) ? std::log(pdfval) : c_dummyLogL;
225  B2DEBUG(22, "MVA response index, MVA score, logL: " << iResponse << " " << transformed_score_copy << " " << logL << " \n");
226  }
227  logLikelihoods[hypo_idx] = logL;
228  } // hypo loop
229 
230  B2DEBUG(22, "logL: ");
231  for (unsigned int iLogL = 0; iLogL < Const::ChargedStable::c_SetSize; iLogL++) {
232  B2DEBUG(22, "\t\t " << iLogL << " : " << logLikelihoods[iLogL]);
233  }
234 
235  const auto eclPidLikelihood = m_eclPidLikelihoods.appendNew(logLikelihoods);
236 
237  track.addRelationTo(eclPidLikelihood);
238  } // tracks
239 }
240 
241 float ECLChargedPIDMVAModule::logTransformation(const float value, const float offset, const float max) const
242 {
243  return std::log((value + offset) / (max + offset - value));
244 }
245 
246 float ECLChargedPIDMVAModule::gaussTransformation(const float value, const TH1F* cdf) const
247 {
248  int binIndex = cdf->GetXaxis()->FindBin(value);
249 
250  double dx = cdf->GetBinWidth(binIndex);
251  double cumulant = cdf->GetBinContent(binIndex);
252  double dy;
253 
254  if (binIndex == cdf->GetNbinsX()) {
255  dy = 1 - cdf->GetBinContent(binIndex);
256  } else if (binIndex == 0) {
257  dy = cdf->GetBinContent(binIndex);
258  } else {
259  dy = cdf->GetBinContent(binIndex + 1) - cdf->GetBinContent(binIndex);
260  }
261 
262  cumulant = cumulant + (value - cdf->GetBinLowEdge(binIndex)) * dy / dx;
263 
264  cumulant = std::min(cumulant, 1.0 - 1e-9);
265  cumulant = std::max(cumulant, 1e-9);
266 
267  double maxErfInvArgRange = 0.99999999;
268  double arg = 2.0 * cumulant - 1.0;
269 
270  arg = std::min(maxErfInvArgRange, arg);
271  arg = std::max(-maxErfInvArgRange, arg);
272 
273  return c_sqrt2 * TMath::ErfInverse(arg);
274 }
275 
276 
277 std::vector<float> ECLChargedPIDMVAModule::decorrTransformation(const std::vector<float> scores,
278  const std::vector<float>* decorrelationMatrix) const
279 {
280  unsigned int n_scores = scores.size();
281  std::vector<float> decor_scores(n_scores, 0.0);
282 
283  for (unsigned int irow = 0; irow < n_scores; irow++) {
284  unsigned int offset = irow * n_scores;
285  for (unsigned int icol = 0; icol < n_scores; icol++) {
286  decor_scores[irow] += scores[irow] * decorrelationMatrix->at(icol + offset);
287  }
288  }
289  return decor_scores;
290 }
291 
293 {
294  float val = std::numeric_limits<float>::quiet_NaN();
295  if (std::holds_alternative<double>(varobj->function(particle))) {
296  val = (float)std::get<double>(varobj->function(particle));
297  } else if (std::holds_alternative<int>(varobj->function(particle))) {
298  val = (float)std::get<int>(varobj->function(particle));
299  } else if (std::holds_alternative<bool>(varobj->function(particle))) {
300  val = (float)std::get<bool>(varobj->function(particle));
301  } else {
302  B2ERROR("Variable '" << varobj->name << "' has wrong data type! It must be one of double, integer, or bool.");
303  }
304  if (std::isnan(val)) {
305  //NaNs cause crashed if using TMVA MulticlassBDT.
306  //In case other MVA methods are used, NaNs likely should be masked properly also.
307  B2FATAL("Variable '" << varobj->name <<
308  "' has NaN entries. Variable definitions in the MVA payload should include 'ifNaNGiveX(X, DEFAULT)'" <<
309  "with a proper default value if it is possible for the variable to evaluate as NaN.");
310  }
311 
312  return val;
313 }
static const unsigned int c_SetSize
Number of elements (for use in array bounds etc.)
Definition: Const.h:606
static const ParticleSet chargedStableSet
set of charged stable particles
Definition: Const.h:609
static const ChargedStable pion
charged pion particle
Definition: Const.h:652
std::unordered_map< unsigned int, std::unique_ptr< MVA::Expert > > m_experts
unordered map of MVA experts.
std::vector< const Variable::Manager::Var * > m_binningVariables
Vector of variables used to define the regions in which the MVAs are trained.
void checkDBPayloads()
Check payloads for consistency.
std::vector< float > decorrTransformation(const std::vector< float > scores, const std::vector< float > *decorrelationMatrix) const
decorrelation transformation.
virtual void initialize() override
Use this to initialize resources or memory your module needs.
virtual void event() override
Called once for each event.
std::unordered_map< unsigned int, std::unique_ptr< MVA::SingleDataset > > m_datasets
MVA dataset to be passed to the expert.
static constexpr double c_sqrt2
Definition of sqrt(2)
StoreObjPtr< EventMetaData > m_eventMetaData
The event information.
void initializeMVA()
Initialise the multiclass MVAs.
virtual ~ECLChargedPIDMVAModule()
Destructor.
float evaluateVariable(const Variable::Manager::Var *varobj, const Particle *particle)
evaluate the variable for the particle and convert the return value float.
float logTransformation(const float value, const float offset, const float max) const
log transformation.
static constexpr float c_dummyLogL
Dummy value of log Likelihood for a particle hypothesis.
virtual void beginRun() override
Called once before a new run begins.
std::unordered_map< unsigned int, std::vector< const Variable::Manager::Var * > > m_variables
Map of vectors containing the variables objects to be fed to the MVA.
StoreArray< Track > m_tracks
array of track objects.
float gaussTransformation(const float value, const TH1F *cdf) const
gaussian transformation.
std::unique_ptr< DBObjPtr< ECLChargedPIDMVAWeights > > m_mvaWeights
Interface to DB payloads containing MVA weightfiles and pdfs.
StoreArray< ECLPidLikelihood > m_eclPidLikelihoods
Array of ECLPidLikelihood objects.
std::string m_payload_name
The name of the database payload object with the MVA weights.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
static std::map< std::string, AbstractInterface * > getSupportedInterfaces()
Returns interfaces supported by the MVA Interface.
Definition: Interface.h:53
static void initSupportedInterfaces()
Static function which initliazes all supported interfaces, has to be called once before getSupportedI...
Definition: Interface.cc:45
General options which are shared by all MVA trainings.
Definition: Options.h:62
static Weightfile loadFromStream(std::istream &stream)
Static function which deserializes a Weightfile from a stream.
Definition: Weightfile.cc:251
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_ParallelProcessingCertified
This module can be run in parallel processing mode safely (All I/O must be done through the data stor...
Definition: Module.h:80
Class to store reconstructed particles.
Definition: Particle.h:75
Values of the result of a track fit with a given particle hypothesis.
Global list of available variables.
Definition: Manager.h:101
static Manager & Instance()
get singleton instance.
Definition: Manager.cc:25
REG_MODULE(arichBtest)
Register the Module.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
Abstract base class for different kinds of events.
std::string name
Unique identifier of the function, used as key.
Definition: Manager.h:130
A variable returning a floating-point value for a given Particle.
Definition: Manager.h:146
FunctionPtr function
Pointer to function.
Definition: Manager.h:147