Belle II Software development
ChargedPidMVAModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8//THIS MODULE
9#include <analysis/modules/ChargedParticleIdentificator/ChargedPidMVAModule.h>
10
11//ANALYSIS
12#include <analysis/dataobjects/Particle.h>
13#include <analysis/dataobjects/ParticleList.h>
14#include <analysis/dbobjects/ChargedPidMVAWeights.h>
15#include <analysis/DecayDescriptor/DecayDescriptor.h>
16#include <analysis/VariableManager/Utility.h>
17#include <analysis/variables/ECLVariables.h>
18
19// FRAMEWORK
20#include <framework/logging/LogConfig.h>
21#include <framework/logging/LogSystem.h>
22#include <mva/interface/Interface.h>
23
24using namespace Belle2;
25
26REG_MODULE(ChargedPidMVA);
27
29{
30 setDescription("This module evaluates the response of an MVA trained for binary charged particle identification between two hypotheses, S and B. For a given input set of (S,B) mass hypotheses, it takes the Particle objects in the appropriate charged stable particle's ParticleLists, calculates the MVA score using the appropriate xml weight file, and adds it as ExtraInfo to the Particle objects.");
31
32 setPropertyFlags(c_ParallelProcessingCertified);
33
34 addParam("sigHypoPDGCode",
35 m_sig_pdg,
36 "The input signal mass hypothesis' pdgId.",
37 int(0));
38 addParam("bkgHypoPDGCode",
39 m_bkg_pdg,
40 "The input background mass hypothesis' pdgId.",
41 int(0));
42 addParam("particleLists",
43 m_decayStrings,
44 "The input list of DecayStrings, where each selected (^) daughter should correspond to a standard charged ParticleList, e.g. ['Lambda0:sig -> ^p+ ^pi-', 'J/psi:sig -> ^mu+ ^mu-']. One can also directly pass a list of standard charged ParticleLists, e.g. ['e+:my_electrons', 'pi+:my_pions']. Note that charge-conjugated ParticleLists will automatically be included.",
45 std::vector<std::string>());
46 addParam("payloadName",
47 m_payload_name,
48 "The name of the database payload object with the MVA weights.",
49 std::string("ChargedPidMVAWeights"));
50 addParam("chargeIndependent",
51 m_charge_independent,
52 "Specify whether to use a charge-independent training of the MVA.",
53 bool(false));
54 addParam("useECLOnlyTraining",
55 m_ecl_only,
56 "Specify whether to use an ECL-only training of the MVA.",
57 bool(false));
58}
59
60
62
63
65{
66 m_event_metadata.isRequired();
67
68 m_weightfiles_representation = std::make_unique<DBObjPtr<ChargedPidMVAWeights>>(m_payload_name);
69
70 if (!(*m_weightfiles_representation.get())->isValidPdg(m_sig_pdg)) {
71 B2FATAL("PDG: " << m_sig_pdg <<
72 " of the signal mass hypothesis is not that of a valid particle in Const::chargedStableSet! Aborting...");
73 }
74 if (!(*m_weightfiles_representation.get())->isValidPdg(m_bkg_pdg)) {
75 B2FATAL("PDG: " << m_bkg_pdg <<
76 " of the background mass hypothesis is not that of a valid particle in Const::chargedStableSet! Aborting...");
77 }
78
79 /* Initialize MVA if the payload has changed and now. */
80 (*m_weightfiles_representation.get()).addCallback([this]() { initializeMVA(); });
82
83 m_score_varname = "pidPairChargedBDTScore_" + std::to_string(m_sig_pdg) + "_VS_" + std::to_string(m_bkg_pdg);
84
85 if (m_ecl_only) {
86 m_score_varname += "_" + std::to_string(Const::ECL);
87 } else {
88 for (const Const::EDetector& det : Const::PIDDetectorSet::set()) {
89 m_score_varname += "_" + std::to_string(det);
90 }
91 }
92}
93
94
96{
97}
98
99
101{
102
103 // Debug strings per log level.
104 std::map<int, std::string> debugStr = {
105 {11, ""},
106 {12, ""}
107 };
108
109 B2DEBUG(11, "EVENT: " << m_event_metadata->getEvent());
110
111 for (auto decayString : m_decayStrings) {
112
113 DecayDescriptor decayDescriptor;
114 decayDescriptor.init(decayString);
115 auto pListName = decayDescriptor.getMother()->getFullName();
116
117 unsigned short m_nSelectedDaughters = decayDescriptor.getSelectionNames().size();
118 StoreObjPtr<ParticleList> pList(pListName);
119
120 if (!pList) {
121 B2FATAL("ParticleList: " << pListName << " could not be found. Aborting...");
122 }
123
124 auto pListSize = pList->getListSize();
125
126 B2DEBUG(11, "ParticleList: " << pList->getParticleListName() << " - N = " << pListSize << " particles.");
127
128 const auto nTargetParticles = (m_nSelectedDaughters == 0) ? pListSize : pListSize * m_nSelectedDaughters;
129
130 // Need to get an absolute value in order to check if in Const::ChargedStable.
131 std::vector<int> pdgs;
132 if (m_nSelectedDaughters == 0) {
133 pdgs.push_back(pList->getPDGCode());
134 } else {
135 pdgs = decayDescriptor.getSelectionPDGCodes();
136 }
137 for (auto pdg : pdgs) {
138 // Check if this ParticleList is made up of legit Const::ChargedStable particles.
139 if (!(*m_weightfiles_representation.get())->isValidPdg(abs(pdg))) {
140 B2FATAL("PDG: " << pdg << " of ParticleList: " << pListName <<
141 " is not that of a valid particle in Const::chargedStableSet! Aborting...");
142 }
143 }
144 std::vector<const Particle*> targetParticles;
145 if (m_nSelectedDaughters > 0) {
146 for (unsigned int iPart(0); iPart < pListSize; ++iPart) {
147 auto* iParticle = pList->getParticle(iPart);
148 auto daughters = decayDescriptor.getSelectionParticles(iParticle);
149 for (auto* iDaughter : daughters) {
150 targetParticles.push_back(iDaughter);
151 }
152 }
153 }
154
155 for (unsigned int ipart(0); ipart < nTargetParticles; ++ipart) {
156
157 const Particle* particle = (m_nSelectedDaughters == 0) ? pList->getParticle(ipart) : targetParticles[ipart];
158
159 if (!(*m_weightfiles_representation.get())->hasImplicitNaNmasking()) {
160 // LEGACY TRAININGS: always require a track-cluster match.
161 const ECLCluster* eclCluster = particle->getECLCluster();
162 if (!eclCluster) {
163 B2DEBUG(11, "\nParticle [" << ipart << "] has invalid Track-ECLCluster relation, skip MVA application...");
164 continue;
165 }
166 }
167
168 // Retrieve the index for the correct MVA expert and dataset,
169 // given the reconstructed (polar angle, p, charge)
170 auto thVarName = (*m_weightfiles_representation.get())->getThetaVarName();
171 auto theta = std::get<double>(Variable::Manager::Instance().getVariable(thVarName)->function(particle));
172 auto p = particle->getP();
173 // Set a dummy charge of zero to pick charge-independent payloads, if requested.
174 auto charge = (!m_charge_independent) ? particle->getCharge() : 0.0;
175 if (std::isnan(theta) or std::isnan(p) or std::isnan(charge)) {
176 B2DEBUG(11, "\nParticle [" << ipart << "] has invalid input variable, skip MVA application..." <<
177 " polar angle: " << theta << ", p: " << p << ", charge: " << charge);
178 continue;
179 }
180
181 int idx_theta, idx_p, idx_charge;
182 auto index = (*m_weightfiles_representation.get())->getMVAWeightIdx(theta, p, charge, idx_theta, idx_p, idx_charge);
183
184 auto hasMatch = std::isnormal(Variable::eclClusterTrackMatched(particle));
185
186 debugStr[11] += "\n";
187 debugStr[11] += ("Particle [" + std::to_string(ipart) + "]\n");
188 debugStr[11] += ("Has ECL cluster match? " + std::to_string(hasMatch) + "\n");
189 debugStr[11] += ("polar angle: " + thVarName + " = " + std::to_string(theta) + " [rad]\n");
190 debugStr[11] += ("p = " + std::to_string(p) + " [GeV/c]\n");
192 debugStr[11] += ("charge = " + std::to_string(charge) + "\n");
193 }
194 debugStr[11] += ("Is brems corrected ? " + std::to_string(particle->hasExtraInfo("bremsCorrected")) + "\n");
195 debugStr[11] += ("Weightfile idx = " + std::to_string(index) + " - (polar angle, p, charge) = (" + std::to_string(
196 idx_theta) + ", " + std::to_string(idx_p) + ", " +
197 std::to_string(idx_charge) + ")\n");
198 if (m_cuts.at(index)) {
199 debugStr[11] += ("Category cut: " + m_cuts.at(index)->decompile() + "\n");
200 }
201
202 B2DEBUG(11, debugStr[11]);
203 debugStr[11].clear();
204
205 // Don't even bother if particle does not fulfil the category selection.
206 if (m_cuts.at(index)) {
207 if (!m_cuts.at(index)->check(particle)) {
208 B2DEBUG(11, "\nParticle [" << ipart << "] didn't pass MVA category cut, skip MVA application...");
209 continue;
210 }
211 }
212
213 // Fill the MVA::SingleDataset w/ variables and spectators.
214
215 debugStr[11] += "\n";
216 debugStr[11] += "MVA variables:\n";
217
218 auto nvars = m_variables.at(index).size();
219 for (unsigned int ivar(0); ivar < nvars; ++ivar) {
220
221 auto varobj = m_variables.at(index).at(ivar);
222
223 double var = std::numeric_limits<double>::quiet_NaN();
224 auto var_result = varobj->function(particle);
225 if (std::holds_alternative<double>(var_result)) {
226 var = std::get<double>(var_result);
227 } else if (std::holds_alternative<int>(var_result)) {
228 var = std::get<int>(var_result);
229 } else if (std::holds_alternative<bool>(var_result)) {
230 var = std::get<bool>(var_result);
231 } else {
232 B2ERROR("Variable '" << varobj->name << "' has wrong data type! It must be one of double, integer, or bool.");
233 }
234
235 if (!(*m_weightfiles_representation.get())->hasImplicitNaNmasking()) {
236 // LEGACY TRAININGS: manual imputation value of -999 for NaN (undefined) variables. Needed by TMVA.
237 var = (std::isnan(var)) ? -999.0 : var;
238 }
239
240 debugStr[11] += ("\tvar[" + std::to_string(ivar) + "] : " + varobj->name + " = " + std::to_string(var) + "\n");
241
242 m_datasets.at(index)->m_input[ivar] = var;
243
244 }
245
246 B2DEBUG(11, debugStr[11]);
247 debugStr[11].clear();
248
249 // Check spectators only when in debug mode.
250 if (LogSystem::Instance().isLevelEnabled(LogConfig::c_Debug, 12)) {
251
252 debugStr[12] += "\n";
253 debugStr[12] += "MVA spectators:\n";
254
255 auto nspecs = m_spectators.at(index).size();
256 for (unsigned int ispec(0); ispec < nspecs; ++ispec) {
257
258 auto specobj = m_spectators.at(index).at(ispec);
259
260 double spec = std::numeric_limits<double>::quiet_NaN();
261 auto spec_result = specobj->function(particle);
262 if (std::holds_alternative<double>(spec_result)) {
263 spec = std::get<double>(spec_result);
264 } else if (std::holds_alternative<int>(spec_result)) {
265 spec = std::get<int>(spec_result);
266 } else if (std::holds_alternative<bool>(spec_result)) {
267 spec = std::get<bool>(spec_result);
268 } else {
269 B2ERROR("Variable '" << specobj->name << "' has wrong data type! It must be one of double, integer, or bool.");
270 }
271
272 debugStr[12] += ("\tspec[" + std::to_string(ispec) + "] : " + specobj->name + " = " + std::to_string(spec) + "\n");
273
274 m_datasets.at(index)->m_spectators[ispec] = spec;
275
276 }
277
278 B2DEBUG(12, debugStr[12]);
279 debugStr[12].clear();
280
281 }
282
283 // Compute MVA score.
284
285 debugStr[11] += "\n";
286 debugStr[12] += "\n";
287 debugStr[11] += "MVA response:\n";
288
289 float score = m_experts.at(index)->apply(*m_datasets.at(index))[0];
290
291 debugStr[11] += ("\tscore = " + std::to_string(score));
292 debugStr[12] += ("\textraInfo: " + m_score_varname + "\n");
293
294 // Store the MVA score as a new particle object property.
295 m_particles[particle->getArrayIndex()]->writeExtraInfo(m_score_varname, score);
296
297 B2DEBUG(11, debugStr[11]);
298 B2DEBUG(12, debugStr[12]);
299 debugStr[11].clear();
300 debugStr[12].clear();
301
302 }
303
304 }
305
306 // Clear the debug string map before next event.
307 debugStr.clear();
308
309}
310
311
313{
314
315 std::map<std::string, std::string> aliasesLegacy;
316
317 aliasesLegacy.insert(std::make_pair("__event__", "evtNum"));
318
319 for (Const::DetectorSet::Iterator it = Const::PIDDetectorSet::set().begin();
320 it != Const::PIDDetectorSet::set().end(); ++it) {
321
322 auto detName = Const::parseDetectors(*it);
323
324 aliasesLegacy.insert(std::make_pair("missingLogL_" + detName, "pidMissingProbabilityExpert(" + detName + ")"));
325
326 for (auto& [pdgId, info] : m_stdChargedInfo) {
327
328 std::string alias = "deltaLogL_" + std::get<0>(info) + "_" + std::get<1>(info) + "_" + detName;
329 std::string var = "pidDeltaLogLikelihoodValueExpert(" + std::to_string(pdgId) + ", " + std::to_string(std::get<2>
330 (info)) + "," + detName + ")";
331
332 aliasesLegacy.insert(std::make_pair(alias, var));
333
334 if (it.getIndex() == 0) {
335 alias = "deltaLogL_" + std::get<0>(info) + "_" + std::get<1>(info) + "_ALL";
336 var = "pidDeltaLogLikelihoodValueExpert(" + std::to_string(pdgId) + ", " + std::to_string(std::get<2>(info)) + ", ALL)";
337 aliasesLegacy.insert(std::make_pair(alias, var));
338 }
339
340 }
341
342 }
343
344 B2INFO("Setting hard-coded aliases for the ChargedPidMVA algorithm.");
345
346 std::string debugStr("\n");
347 for (const auto& [alias, variable] : aliasesLegacy) {
348 debugStr += (alias + " --> " + variable + "\n");
349 if (!Variable::Manager::Instance().addAlias(alias, variable)) {
350 B2ERROR("Something went wrong with setting alias: " << alias << " for variable: " << variable);
351 }
352 }
353 B2DEBUG(10, debugStr);
354
355}
356
357
359{
360
361 auto aliases = (*m_weightfiles_representation.get())->getAliases();
362
363 if (!aliases->empty()) {
364
365 B2INFO("Setting aliases for the ChargedPidMVA algorithm read from the payload.");
366
367 std::string debugStr("\n");
368 for (const auto& [alias, variable] : *aliases) {
369 if (alias != variable) {
370 debugStr += (alias + " --> " + variable + "\n");
371 if (!Variable::Manager::Instance().addAlias(alias, variable)) {
372 B2ERROR("Something went wrong with setting alias: " << alias << " for variable: " << variable);
373 }
374 }
375 }
376 B2DEBUG(10, debugStr);
377
378 return;
379
380 }
381
382 // Manually set aliases - for bw compatibility
383 this->registerAliasesLegacy();
384
385}
386
387
389{
390
391 B2INFO("Run: " << m_event_metadata->getRun() << ". Load supported MVA interfaces for binary charged particle identification...");
392
393 // Set the necessary variable aliases from the payload.
394 this->registerAliases();
395
396 // The supported methods have to be initialized once (calling it more than once is safe).
398 auto supported_interfaces = MVA::AbstractInterface::getSupportedInterfaces();
399
400 B2INFO("\tLoading weightfiles from the payload class for SIGNAL particle hypothesis: " << m_sig_pdg);
401
402 auto serialized_weightfiles = (*m_weightfiles_representation.get())->getMVAWeights(m_sig_pdg);
403 auto nfiles = serialized_weightfiles->size();
404
405 B2INFO("\tConstruct the MVA experts and datasets from N = " << nfiles << " weightfiles...");
406
407 // The size of the vectors must correspond
408 // to the number of available weightfiles for this pdgId.
409 m_experts.resize(nfiles);
410 m_datasets.resize(nfiles);
411 m_cuts.resize(nfiles);
412 m_variables.resize(nfiles);
413 m_spectators.resize(nfiles);
414
415 for (unsigned int idx(0); idx < nfiles; idx++) {
416
417 B2DEBUG(12, "\t\tweightfile[" << idx << "]");
418
419 // De-serialize the string into an MVA::Weightfile object.
420 std::stringstream ss(serialized_weightfiles->at(idx));
421 auto weightfile = MVA::Weightfile::loadFromStream(ss);
422
423 MVA::GeneralOptions general_options;
424 weightfile.getOptions(general_options);
425
426 // Store the list of pointers to the relevant variables for this xml file.
427 Variable::Manager& manager = Variable::Manager::Instance();
428 m_variables[idx] = manager.getVariables(general_options.m_variables);
429 m_spectators[idx] = manager.getVariables(general_options.m_spectators);
430
431 B2DEBUG(12, "\t\tRetrieved N = " << general_options.m_variables.size()
432 << " variables, N = " << general_options.m_spectators.size()
433 << " spectators");
434
435 // Store an MVA::Expert object.
436 m_experts[idx] = supported_interfaces[general_options.m_method]->getExpert();
437 m_experts.at(idx)->load(weightfile);
438
439 B2DEBUG(12, "\t\tweightfile loaded successfully into expert[" << idx << "]!");
440
441 // Store an MVA::SingleDataset object, in which we will save our features later...
442 std::vector<float> v(general_options.m_variables.size(), 0.0);
443 std::vector<float> s(general_options.m_spectators.size(), 0.0);
444 m_datasets[idx] = std::make_unique<MVA::SingleDataset>(general_options, v, 1.0, s);
445
446 B2DEBUG(12, "\t\tdataset[" << idx << "] created successfully!");
447
448 // Compile cut for this category.
449 const auto cuts = (*m_weightfiles_representation.get())->getCuts(m_sig_pdg);
450 const auto cutstr = (!cuts->empty()) ? cuts->at(idx) : "";
451 m_cuts[idx] = (!cutstr.empty()) ? Variable::Cut::compile(cutstr) : nullptr;
452
453 B2DEBUG(12, "\t\tcut[" << idx << "] created successfully!");
454
455 }
456
457}
StoreObjPtr< EventMetaData > m_event_metadata
The event information.
int m_bkg_pdg
The input background mass hypothesis' pdgId.
std::vector< std::string > m_decayStrings
The input list of DecayStrings, where each selected (^) daughter should correspond to a standard char...
bool m_ecl_only
Flag to specify if we use an ECL-only based training.
std::string m_score_varname
The lookup name of the MVA score variable, given the input S, B mass hypotheses for the algorithm.
std::unique_ptr< DBObjPtr< ChargedPidMVAWeights > > m_weightfiles_representation
Interface to get the database payload with the MVA weight files.
StoreArray< Particle > m_particles
StoreArray of Particles.
DatasetsList m_datasets
List of MVA::SingleDataset objects.
std::map< int, std::tuple< std::string, std::string, int > > m_stdChargedInfo
Map with standard charged particles' info.
bool m_charge_independent
Flag to specify if we use a charge-independent training.
virtual void event() override
Called once for each event.
void registerAliases()
Set variable aliases needed by the MVA.
virtual void initialize() override
Use this to initialize resources or memory your module needs.
int m_sig_pdg
The input signal mass hypothesis' pdgId.
virtual void beginRun() override
Called once before a new run begins.
VariablesLists m_variables
List of lists of feature variables.
void registerAliasesLegacy()
Set variable aliases needed by the MVA.
ChargedPidMVAModule()
Constructor, for setting module description and parameters.
CutsList m_cuts
List of Cut objects.
VariablesLists m_spectators
List of lists of spectator variables.
virtual ~ChargedPidMVAModule()
Destructor, use this to clean up anything you created in the constructor.
ExpertsList m_experts
List of MVA::Expert objects.
std::string m_payload_name
The name of the database payload object with the MVA weights.
Iterator end() const
Ending iterator.
Definition UnitConst.cc:217
EDetector
Enum for identifying the detector components (detector and subdetector).
Definition Const.h:42
static std::string parseDetectors(EDetector det)
Converts Const::EDetector object to string.
Definition UnitConst.cc:161
static std::unique_ptr< GeneralCut > compile(const std::string &cut)
Definition GeneralCut.h:84
@ c_Debug
Debug: for code development.
Definition LogConfig.h:26
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition LogSystem.cc:28
static void initSupportedInterfaces()
Static function which initliazes all supported interfaces, has to be called once before getSupportedI...
Definition Interface.cc:46
static std::map< std::string, AbstractInterface * > getSupportedInterfaces()
Returns interfaces supported by the MVA Interface.
Definition Interface.h:53
static Weightfile loadFromStream(std::istream &stream)
Static function which deserializes a Weightfile from a stream.
Base class for Modules.
Definition Module.h:72
static Manager & Instance()
get singleton instance.
Definition Manager.cc:26
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition Module.h:649
double charge(int pdgCode)
Returns electric charge of a particle with given pdg code.
Definition EvtPDLUtil.cc:44
Abstract base class for different kinds of events.