Belle II Software  light-2205-abys
ChargedPidMVAModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 //THIS MODULE
9 #include <analysis/modules/ChargedParticleIdentificator/ChargedPidMVAModule.h>
10 
11 //ANALYSIS
12 #include <mva/interface/Interface.h>
13 #include <analysis/VariableManager/Utility.h>
14 #include <analysis/dataobjects/Particle.h>
15 #include <analysis/dataobjects/ParticleList.h>
16 
17 //MDST
18 #include <mdst/dataobjects/ECLCluster.h>
19 
20 using namespace Belle2;
21 
22 REG_MODULE(ChargedPidMVA);
23 
25 {
26  setDescription("This module evaluates the response of an MVA trained for binary charged particle identification between two hypotheses, S and B. For a given input set of (S,B) mass hypotheses, it takes the Particle objects in the appropriate charged stable particle's ParticleLists, calculates the MVA score using the appropriate xml weight file, and adds it as ExtraInfo to the Particle objects.");
27 
28  setPropertyFlags(c_ParallelProcessingCertified);
29 
30  addParam("sigHypoPDGCode",
31  m_sig_pdg,
32  "The input signal mass hypothesis' pdgId.",
33  int(0));
34  addParam("bkgHypoPDGCode",
35  m_bkg_pdg,
36  "The input background mass hypothesis' pdgId.",
37  int(0));
38  addParam("particleLists",
39  m_decayStrings,
40  "The input list of decay strings, where the mother particle string should correspond to a full name of a particle list. One can select to run on daughters instead of mother particle, e.g. ['Lambda0 -> ^p+ ^pi-'].",
41  std::vector<std::string>());
42  addParam("payloadName",
43  m_payload_name,
44  "The name of the database payload object with the MVA weights.",
45  std::string("ChargedPidMVAWeights"));
46  addParam("chargeIndependent",
47  m_charge_independent,
48  "Specify whether to use a charge-independent training of the MVA.",
49  bool(false));
50  addParam("useECLOnlyTraining",
51  m_ecl_only,
52  "Specify whether to use an ECL-only training of the MVA.",
53  bool(false));
54 }
55 
56 
57 ChargedPidMVAModule::~ChargedPidMVAModule() = default;
58 
59 
60 void ChargedPidMVAModule::initialize()
61 {
62 
63  m_event_metadata.isRequired();
64 
65  m_weightfiles_representation = std::make_unique<DBObjPtr<ChargedPidMVAWeights>>(m_payload_name);
66 }
67 
68 
70 {
71 
72  // Retrieve the payload from the DB.
73  (*m_weightfiles_representation.get()).addCallback([this]() { initializeMVA(); });
74  initializeMVA();
75 
76  if (!(*m_weightfiles_representation.get())->isValidPdg(m_sig_pdg)) {
77  B2FATAL("PDG: " << m_sig_pdg <<
78  " of the signal mass hypothesis is not that of a valid particle in Const::chargedStableSet! Aborting...");
79  }
80  if (!(*m_weightfiles_representation.get())->isValidPdg(m_bkg_pdg)) {
81  B2FATAL("PDG: " << m_bkg_pdg <<
82  " of the background mass hypothesis is not that of a valid particle in Const::chargedStableSet! Aborting...");
83  }
84 
85  m_score_varname = "pidPairChargedBDTScore_" + std::to_string(m_sig_pdg) + "_VS_" + std::to_string(m_bkg_pdg);
86 
87  if (m_ecl_only) {
88  m_score_varname += "_" + std::to_string(Const::ECL);
89  } else {
90  for (size_t iDet(0); iDet < Const::PIDDetectors::set().size(); ++iDet) {
91  m_score_varname += "_" + std::to_string(Const::PIDDetectors::set()[iDet]);
92  }
93  }
94 }
95 
96 
98 {
99 
100  B2DEBUG(11, "EVENT: " << m_event_metadata->getEvent());
101  for (auto decayString : m_decayStrings) {
102  DecayDescriptor decayDescriptor;
103  decayDescriptor.init(decayString);
104  auto pl_name = decayDescriptor.getMother()->getFullName();
105  unsigned short m_nSelectedDaughters = decayDescriptor.getSelectionNames().size();
106  StoreObjPtr<ParticleList> pList(pl_name);
107  if (!pList) { B2FATAL("ParticleList: " << pl_name << " could not be found. Aborting..."); }
108 
109  // Need to get an absolute value in order to check if in Const::ChargedStable.
110  std::vector<int> pdgs;
111  if (m_nSelectedDaughters == 0)
112  pdgs.push_back(pList->getPDGCode());
113  else
114  pdgs = decayDescriptor.getSelectionPDGCodes();
115  for (auto pdg : pdgs) {
116  // Check if this ParticleList is made up of legit Const::ChargedStable particles.
117  if (!(*m_weightfiles_representation.get())->isValidPdg(abs(pdg))) {
118  B2FATAL("PDG: " << pdg << " of ParticleList: " << pl_name <<
119  " is not that of a valid particle in Const::chargedStableSet! Aborting...");
120  }
121  }
122 
123  B2DEBUG(11, "ParticleList: " << pList->getParticleListName() << " - N = " << pList->getListSize() << " particles.");
124  const auto nTargetParticles = (m_nSelectedDaughters == 0) ? pList->getListSize() : pList->getListSize() *
125  m_nSelectedDaughters;
126  std::vector<const Particle*> targetParticles;
127  if (m_nSelectedDaughters > 0) {
128  for (unsigned int iPart(0); iPart < pList->getListSize(); ++iPart) {
129  auto* iParticle = pList->getParticle(iPart);
130  auto daughters = decayDescriptor.getSelectionParticles(iParticle);
131  for (auto* iDaughter : daughters) {
132  targetParticles.push_back(iDaughter);
133  }
134  }
135  }
136  for (unsigned int ipart(0); ipart < nTargetParticles; ++ipart) {
137 
138  const Particle* particle = (m_nSelectedDaughters == 0) ? pList->getParticle(ipart) : targetParticles[ipart];
139  B2DEBUG(11, "\tParticle [" << ipart << "]");
140 
141  // Check that the particle has a valid relation set between track and ECL cluster.
142  // Otherwise, skip to next.
143  const ECLCluster* eclCluster = particle->getECLCluster();
144  if (!eclCluster) {
145  B2DEBUG(11, "\t\tParticle has invalid Track-ECLCluster relation, skip MVA application...");
146  continue;
147  }
148 
149  // Retrieve the index for the correct MVA expert and dataset,
150  // given reconstructed (clusterTheta, p, charge)
151  auto clusterTheta = eclCluster->getTheta();
152  auto p = particle->getP();
153  // Set a dummy charge of zero to pick charge-independent payloads, if requested.
154  auto charge = (!m_charge_independent) ? particle->getCharge() : 0.0;
155  int idx_theta, idx_p, idx_charge;
156  auto index = (*m_weightfiles_representation.get())->getMVAWeightIdx(clusterTheta, p, charge, idx_theta, idx_p, idx_charge);
157 
158  // Get the cut defining the MVA category under exam (this reflects the one used in the training).
159  const auto cuts = (*m_weightfiles_representation.get())->getCuts(m_sig_pdg);
160  const auto cutstr = (!cuts->empty()) ? cuts->at(index) : "";
161 
162  B2DEBUG(11, "\t\tclusterTheta = " << clusterTheta << " [rad]");
163  B2DEBUG(11, "\t\tp = " << p << " [GeV/c]");
164  if (!m_charge_independent) {
165  B2DEBUG(11, "\t\tcharge = " << charge);
166  }
167  B2DEBUG(11, "\t\tBrems corrected = " << particle->hasExtraInfo("bremsCorrectedPhotonEnergy"));
168  B2DEBUG(11, "\t\tWeightfile idx = " << index << " - (clusterTheta, p, charge) = (" << idx_theta << ", " << idx_p << ", " <<
169  idx_charge << ")");
170  if (!cutstr.empty()) {
171  B2DEBUG(11, "\tCategory cut: " << cutstr);
172  }
173 
174  // Fill the MVA::SingleDataset w/ variables and spectators.
175 
176  B2DEBUG(11, "\tMVA variables:");
177 
178  auto nvars = m_variables.at(index).size();
179  for (unsigned int ivar(0); ivar < nvars; ++ivar) {
180 
181  auto varobj = m_variables.at(index).at(ivar);
182 
183  double var = -999.0;
184  auto var_result = varobj->function(particle);
185  if (std::holds_alternative<double>(var_result)) {
186  var = std::get<double>(var_result);
187  } else if (std::holds_alternative<int>(var_result)) {
188  var = std::get<int>(var_result);
189  } else if (std::holds_alternative<bool>(var_result)) {
190  var = std::get<bool>(var_result);
191  } else {
192  B2ERROR("Variable '" << varobj->name << "' has wrong data type! It must be one of double, integer, or bool.");
193  }
194 
195  // Manual imputation value of -999 for NaN (undefined) variables.
196  var = (std::isnan(var)) ? -999.0 : var;
197 
198  B2DEBUG(11, "\t\tvar[" << ivar << "] : " << varobj->name << " = " << var);
199 
200  m_datasets.at(index)->m_input[ivar] = var;
201 
202  }
203 
204  B2DEBUG(12, "\tMVA spectators:");
205 
206  auto nspecs = m_spectators.at(index).size();
207  for (unsigned int ispec(0); ispec < nspecs; ++ispec) {
208 
209  auto specobj = m_spectators.at(index).at(ispec);
210 
211  double spec = std::numeric_limits<double>::quiet_NaN();
212  auto spec_result = specobj->function(particle);
213  if (std::holds_alternative<double>(spec_result)) {
214  spec = std::get<double>(spec_result);
215  } else if (std::holds_alternative<int>(spec_result)) {
216  spec = std::get<int>(spec_result);
217  } else if (std::holds_alternative<bool>(spec_result)) {
218  spec = std::get<bool>(spec_result);
219  } else {
220  B2ERROR("Variable '" << specobj->name << "' has wrong data type! It must be one of double, integer, or bool.");
221  }
222 
223  B2DEBUG(12, "\t\tspec[" << ispec << "] : " << specobj->name << " = " << spec);
224 
225  m_datasets.at(index)->m_spectators[ispec] = spec;
226 
227  }
228 
229  // Compute MVA score only if particle fulfils category selection.
230  if (!cutstr.empty()) {
231 
232  std::unique_ptr<Variable::Cut> cut = Variable::Cut::compile(cutstr);
233 
234  if (!cut->check(particle)) {
235  B2DEBUG(11, "\t\tParticle didn't pass MVA category cut, skip MVA application...");
236  continue;
237  }
238 
239  }
240 
241  float score = m_experts.at(index)->apply(*m_datasets.at(index))[0];
242 
243  B2DEBUG(11, "\tMVA score = " << score);
244  B2DEBUG(12, "\tExtraInfo: " << m_score_varname);
245 
246  // Store the MVA score as a new particle object property.
247  m_particles[particle->getArrayIndex()]->writeExtraInfo(m_score_varname, score);
248 
249  }
250 
251  }
252 }
253 
254 
256 {
257 
258  B2INFO("Run: " << m_event_metadata->getRun() << ". Load supported MVA interfaces for binary charged particle identification...");
259 
260  // The supported methods have to be initialized once (calling it more than once is safe).
262  auto supported_interfaces = MVA::AbstractInterface::getSupportedInterfaces();
263 
264  B2INFO("\tLoading weightfiles from the payload class for SIGNAL particle hypothesis: " << m_sig_pdg);
265 
266  auto serialized_weightfiles = (*m_weightfiles_representation.get())->getMVAWeights(m_sig_pdg);
267  auto nfiles = serialized_weightfiles->size();
268 
269  B2INFO("\tConstruct the MVA experts and datasets from N = " << nfiles << " weightfiles...");
270 
271  // The size of the vectors must correspond
272  // to the number of available weightfiles for this pdgId.
273  m_experts.resize(nfiles);
274  m_datasets.resize(nfiles);
275  m_variables.resize(nfiles);
276  m_spectators.resize(nfiles);
277 
278  for (unsigned int idx(0); idx < nfiles; idx++) {
279 
280  B2DEBUG(12, "\t\tweightfile[" << idx << "]");
281 
282  // De-serialize the string into an MVA::Weightfile object.
283  std::stringstream ss(serialized_weightfiles->at(idx));
284  auto weightfile = MVA::Weightfile::loadFromStream(ss);
285 
286  MVA::GeneralOptions general_options;
287  weightfile.getOptions(general_options);
288 
289  // Store the list of pointers to the relevant variables for this xml file.
291  m_variables[idx] = manager.getVariables(general_options.m_variables);
292  m_spectators[idx] = manager.getVariables(general_options.m_spectators);
293 
294  B2DEBUG(12, "\t\tRetrieved N = " << general_options.m_variables.size()
295  << " variables, N = " << general_options.m_spectators.size()
296  << " spectators");
297 
298  // Store an MVA::Expert object.
299  m_experts[idx] = supported_interfaces[general_options.m_method]->getExpert();
300  m_experts.at(idx)->load(weightfile);
301 
302  B2DEBUG(12, "\t\tweightfile loaded successfully into expert[" << idx << "]!");
303 
304  // Store an MVA::SingleDataset object, in which we will save our features later...
305  std::vector<float> v(general_options.m_variables.size(), 0.0);
306  std::vector<float> s(general_options.m_spectators.size(), 0.0);
307  m_datasets[idx] = std::make_unique<MVA::SingleDataset>(general_options, v, 1.0, s);
308 
309  B2DEBUG(12, "\t\tdataset[" << idx << "] created successfully!");
310 
311  }
312 
313 }
StoreObjPtr< EventMetaData > m_event_metadata
The event information.
int m_bkg_pdg
The input background mass hypothesis' pdgId.
std::vector< std::string > m_decayStrings
The input list of decay strings to which MVA weights will be applied.
bool m_ecl_only
Flag to specify if we use an ECL-only based training.
std::string m_score_varname
The lookup name of the MVA score variable, given the input S, B mass hypotheses for the algorithm.
std::unique_ptr< DBObjPtr< ChargedPidMVAWeights > > m_weightfiles_representation
Interface to get the database payload with the MVA weight files.
StoreArray< Particle > m_particles
StoreArray of Particles.
DatasetsList m_datasets
List of MVA::SingleDataset objects.
bool m_charge_independent
Flag to specify if we use a charge-independent training.
virtual void event() override
Called once for each event.
int m_sig_pdg
The input signal mass hypothesis' pdgId.
virtual void beginRun() override
Called once before a new run begins.
VariablesLists m_variables
List of lists of feature variables.
ChargedPidMVAModule()
Constructor, for setting module description and parameters.
VariablesLists m_spectators
List of lists of spectator variables.
ExpertsList m_experts
List of MVA::Expert objects.
std::string m_payload_name
The name of the database payload object with the MVA weights.
size_t size() const
Getter for number of detector IDs in this set.
Definition: UnitConst.cc:256
static DetectorSet set()
Accessor function for the set of valid detectors.
Definition: Const.h:255
The DecayDescriptor stores information about a decay tree or parts of a decay tree.
ECL cluster data.
Definition: ECLCluster.h:27
double getTheta() const
Return Corrected Theta of Shower (radian).
Definition: ECLCluster.h:304
static std::unique_ptr< GeneralCut > compile(const std::string &cut)
Creates an instance of a cut and returns a unique_ptr to it, if you need a copy-able object instead y...
Definition: GeneralCut.h:84
static std::map< std::string, AbstractInterface * > getSupportedInterfaces()
Returns interfaces supported by the MVA Interface.
Definition: Interface.h:53
static void initSupportedInterfaces()
Static function which initliazes all supported interfaces, has to be called once before getSupportedI...
Definition: Interface.cc:45
General options which are shared by all MVA trainings.
Definition: Options.h:62
static Weightfile loadFromStream(std::istream &stream)
Static function which deserializes a Weightfile from a stream.
Definition: Weightfile.cc:250
Base class for Modules.
Definition: Module.h:72
Class to store reconstructed particles.
Definition: Particle.h:74
const ECLCluster * getECLCluster() const
Returns the pointer to the ECLCluster object that was used to create this Particle (if ParticleType =...
Definition: Particle.cc:883
bool hasExtraInfo(const std::string &name) const
Return whether the extra info with the given name is set.
Definition: Particle.cc:1255
double getCharge(void) const
Returns particle charge.
Definition: Particle.cc:645
double getP() const
Returns momentum magnitude (same as getMomentumMagnitude but with shorter name)
Definition: Particle.h:515
int getArrayIndex() const
Returns this object's array index (in StoreArray), or -1 if not found.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:95
Global list of available variables.
Definition: Manager.h:101
static Manager & Instance()
get singleton instance.
Definition: Manager.cc:25
REG_MODULE(B2BIIConvertBeamParams)
Register the module.
double charge(int pdgCode)
Returns electric charge of a particle with given pdg code.
Definition: EvtPDLUtil.cc:44
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:23