Belle II Software development
ECLChargedPIDMVAModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9/* Own header. */
10#include <ecl/modules/eclChargedPIDMVA/ECLChargedPIDMVAModule.h>
11
12/* Basf2 headers. */
13#include <analysis/VariableManager/Manager.h>
14#include <analysis/dataobjects/Particle.h>
15#include <framework/gearbox/Const.h>
16#include <framework/logging/Logger.h>
17#include <mdst/dataobjects/Track.h>
18#include <mva/interface/Interface.h>
19
20/* ROOT headers. */
21#include <TMath.h>
22
23/* C++ headers. */
24#include <limits>
25#include <string>
26
27using namespace Belle2;
29
30REG_MODULE(ECLChargedPIDMVA);
31
33{
34 setDescription("This module implements charged particle identification using ECL-related observables via a multiclass MVA. For each track a set of relevant ECL variables(shower shape, PSD etc.) are fed to the MVA method which is stored in a conditions database payload. The MVA output variables are then used to construct a likelihood from pdfs also stored in the payload. The likelihood is then stored in the ECLPidLikelihood object.");
36 addParam("payloadName",
38 "The name of the database payload object with the MVA weights.",
39 std::string("ECLChargedPidMVAWeights"));
40}
41
43
45{
46 m_eventMetaData.isRequired();
47
48 m_mvaWeights = std::make_unique<DBObjPtr<ECLChargedPIDMVAWeights>>(m_payload_name);
49
50 m_eclPidLikelihoods.registerInDataStore();
51 m_tracks.registerRelationTo(m_eclPidLikelihoods);
52}
53
55{
56 if (!m_mvaWeights) {B2FATAL("No ECLChargedPidMVAWeights payload found in database!");}
57}
58
60{
61 B2DEBUG(22, "Run: " << m_eventMetaData->getRun() <<
62 ". Loading supported MVA interfaces for multi-class charged particle identification.");
63
65 m_binningVariables = manager.getVariables((*m_mvaWeights.get())->getBinningVariables());
66
68 auto supported_interfaces = MVA::AbstractInterface::getSupportedInterfaces();
69
70 for (const auto& iterator : * (*m_mvaWeights.get())->getPhasespaceCategories()) {
71
72 B2DEBUG(22, "\t\tweightfile[" << iterator.first << "]");
73
74 // De-serialize the string into an MVA::Weightfile object.
75 std::stringstream ss(iterator.second.getSerialisedWeight());
76 auto weightfile = MVA::Weightfile::loadFromStream(ss);
77
78 MVA::GeneralOptions general_options;
79 weightfile.getOptions(general_options);
80
81 // Store an MVA::Expert object.
82 m_experts[iterator.first] = supported_interfaces[general_options.m_method]->getExpert();
83 m_experts.at(iterator.first)->load(weightfile);
84
85 B2DEBUG(22, "\t\tweightfile at " << iterator.first << " successfully initialised.");
86
87 // Get the full version of the variable names.
88 // These are stored in the xml as an additional vector.
89 const std::string identifier = std::string("de_aliased_clf_vars");
90 auto clf_vars = weightfile.getVector<std::string>(identifier);
91 m_variables[iterator.first] = manager.getVariables(clf_vars);
92
93 B2DEBUG(22, "\t\t Weightfile at " << iterator.first << " has " << m_variables[iterator.first].size() << " features.");
94
95 std::vector<float> features(clf_vars.size(), 0.0);
96 std::vector<float> spectators;
97
98 m_datasets[iterator.first] = std::make_unique<MVA::SingleDataset>(general_options, features, 1.0, spectators);
99 }
100}
101
103{
104 (*m_mvaWeights.get()).addCallback([this]() { checkDBPayloads();});
106 (*m_mvaWeights.get()).addCallback([this]() { initializeMVA(); });
108}
109
111{
112 for (const auto& track : m_tracks) {
113
114 // Load the pion fit hypothesis or closest in mass.
115 const TrackFitResult* fitRes = track.getTrackFitResultWithClosestMass(Const::pion);
116 if (fitRes == nullptr) continue;
117
118 // If a track omega value is very small trackFitResult might return a charge of 0.
119 // The particle is then not able to be assigned a pdg code causing a crash.
120 // Skip such tracks.
121 const int charge = fitRes->getChargeSign();
122 if (charge == 0) continue;
123
124 // Create a particle object so that we can use the variable manager.
125 const Particle particle = Particle(&track, Const::pion);
126
127 // Get global bin index for track corresponding to N dimensional binning.
128 std::vector<float> binningVariableValues(m_binningVariables.size());
129 for (unsigned int ivar(0); ivar < binningVariableValues.size(); ivar++) {
130 auto varobj = m_binningVariables.at(ivar);
131 binningVariableValues.at(ivar) = evaluateVariable(varobj, &particle);
132 }
133 const int linearCategoryIndex = (*m_mvaWeights.get())->getLinearisedCategoryIndex(binningVariableValues);
134
135 // Require we cover the phasespace.
136 // Alternatively could take closest covered region but behaviour will not be well understood.
137 // After this check the linearCategoryIndex is guaranteed to be positive.
138 if (!(*m_mvaWeights.get())->isPhasespaceCovered(linearCategoryIndex)) continue;
139
140 // Get the phasespaceCategory
141 const auto phasespaceCategory = (*m_mvaWeights.get())->getPhasespaceCategory(linearCategoryIndex);
142 const auto transformMode = phasespaceCategory->getTransformMode();
143
144 // Fill the feature vectors
145 unsigned int nvars = m_variables.at(linearCategoryIndex).size();
146 for (unsigned int ivar(0); ivar < nvars; ++ivar) {
147 auto varobj = m_variables.at(linearCategoryIndex).at(ivar);
148 m_datasets.at(linearCategoryIndex)->m_input[ivar] = evaluateVariable(varobj, &particle);
149 }
150
151 // Get the mva response to be converted to a likelihood
152 std::vector<float> scores = m_experts.at(linearCategoryIndex)->applyMulticlass(*m_datasets.at(linearCategoryIndex))[0];
153
154 // Log transform the scores
155 // This turns the MVA output which, if using the nominal TMVA MulticlassBDT,
156 // is heavily peaked at 0 and 1 into a smooth curve.
157 // We can then evaluate the likelihoods from this curve directly or further transform the responses.
158 for (unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
159 if ((transformMode != transformModes::c_DirectMVAResponse) and
160 (transformMode != transformModes::c_LogMVAResponse)) {
161 scores[iResponse] = logTransformation(scores[iResponse],
162 phasespaceCategory->getLogTransformOffset(),
163 phasespaceCategory->getMaxPossibleResponseValue());
164 }
165 }
166
167 B2DEBUG(22, "Scores: ");
168 for (unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
169 B2DEBUG(22, "\t\t " << iResponse << " : " << scores[iResponse]);
170 }
171
172 float logLikelihoods[Const::ChargedStable::c_SetSize];
173
174 // Order of loop is defined in UnitConst.cc: e, mu, pi, K, p, d
175 for (const auto& hypo : Const::chargedStableSet) {
176 unsigned int hypo_idx = hypo.getIndex();
177 auto absPdgId = abs(hypo.getPDGCode());
178
179 // Copy the scores so they aren't transformed in place
180 std::vector<float> transformed_scores;
181 transformed_scores = scores;
182
183 // Perform extra transformations if they are booked
184 if ((transformMode == transformModes::c_GaussianTransform) or
185 (transformMode == transformModes::c_DecorrelationTransform)) {
186
187 // Gaussian transform
188 for (unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
189 transformed_scores[iResponse] = gaussTransformation(scores[iResponse], phasespaceCategory->getCDF(absPdgId, iResponse));
190 }
191 if (transformMode == transformModes::c_DecorrelationTransform) {
192 transformed_scores = decorrTransformation(transformed_scores, phasespaceCategory->getDecorrelationMatrix(absPdgId));
193 }
194 }
195
196 // Get the pdf values for each response value
197 float logL = 0.0;
198 if ((transformMode == transformModes::c_DirectMVAResponse) or
199 (transformMode == transformModes::c_LogMVAResponse)) {
200
201 logLikelihoods[hypo_idx] = scores[phasespaceCategory->getMVAIndexForHypothesis(absPdgId)];
202 if (transformMode == transformModes::c_LogMVAResponse) {
203 logLikelihoods[hypo_idx] = (std::isnormal(logLikelihoods[hypo_idx])
204 && logLikelihoods[hypo_idx] > 0) ? std::log(logLikelihoods[hypo_idx]) : c_dummyLogL;
205 }
206 logLikelihoods[hypo_idx] = logLikelihoods[hypo_idx] / phasespaceCategory->getTemperature();
207 continue;
208 }
209 B2DEBUG(22, "MVA Index for hypo " << absPdgId << " : " << phasespaceCategory->getMVAIndexForHypothesis(absPdgId));
210
211 for (unsigned int iResponse = 0; iResponse < transformed_scores.size(); iResponse++) {
212 if ((transformMode == transformModes::c_LogTransformSingle) and
213 (phasespaceCategory->getMVAIndexForHypothesis(absPdgId) != iResponse)) {continue;}
214
215 double xmin, xmax;
216 phasespaceCategory->getPDF(iResponse, absPdgId)->GetRange(xmin, xmax);
217 // Kick the response back into the range of the PDF (alternatively consider logL = std::numeric_limits<float>::min() if outside range)
218 float transformed_score_copy = transformed_scores[iResponse];
219 if (transformed_score_copy < xmin) transformed_score_copy = xmin + 1e-5;
220 if (transformed_score_copy > xmax) transformed_score_copy = xmax - 1e-5;
221
222 float pdfval = phasespaceCategory->getPDF(iResponse, absPdgId)->Eval(transformed_score_copy);
223 // Don't take a log of inf or 0
224 logL += (std::isnormal(pdfval) && pdfval > 0) ? std::log(pdfval) : c_dummyLogL;
225 B2DEBUG(22, "MVA response index, MVA score, logL: " << iResponse << " " << transformed_score_copy << " " << logL << " \n");
226 }
227 logLikelihoods[hypo_idx] = logL;
228 } // hypo loop
229
230 B2DEBUG(22, "logL: ");
231 for (unsigned int iLogL = 0; iLogL < Const::ChargedStable::c_SetSize; iLogL++) {
232 B2DEBUG(22, "\t\t " << iLogL << " : " << logLikelihoods[iLogL]);
233 }
234
235 const auto eclPidLikelihood = m_eclPidLikelihoods.appendNew(logLikelihoods);
236
237 track.addRelationTo(eclPidLikelihood);
238 } // tracks
239}
240
241float ECLChargedPIDMVAModule::logTransformation(const float value, const float offset, const float max) const
242{
243 return std::log((value + offset) / (max + offset - value));
244}
245
246float ECLChargedPIDMVAModule::gaussTransformation(const float value, const TH1F* cdf) const
247{
248 int binIndex = cdf->GetXaxis()->FindBin(value);
249
250 double dx = cdf->GetBinWidth(binIndex);
251 double cumulant = cdf->GetBinContent(binIndex);
252 double dy;
253
254 if (binIndex == cdf->GetNbinsX()) {
255 dy = 1 - cdf->GetBinContent(binIndex);
256 } else if (binIndex == 0) {
257 dy = cdf->GetBinContent(binIndex);
258 } else {
259 dy = cdf->GetBinContent(binIndex + 1) - cdf->GetBinContent(binIndex);
260 }
261
262 cumulant = cumulant + (value - cdf->GetBinLowEdge(binIndex)) * dy / dx;
263
264 cumulant = std::min(cumulant, 1.0 - 1e-9);
265 cumulant = std::max(cumulant, 1e-9);
266
267 double maxErfInvArgRange = 0.99999999;
268 double arg = 2.0 * cumulant - 1.0;
269
270 arg = std::min(maxErfInvArgRange, arg);
271 arg = std::max(-maxErfInvArgRange, arg);
272
273 return c_sqrt2 * TMath::ErfInverse(arg);
274}
275
276
277std::vector<float> ECLChargedPIDMVAModule::decorrTransformation(const std::vector<float> scores,
278 const std::vector<float>* decorrelationMatrix) const
279{
280 unsigned int n_scores = scores.size();
281 std::vector<float> decor_scores(n_scores, 0.0);
282
283 for (unsigned int irow = 0; irow < n_scores; irow++) {
284 unsigned int offset = irow * n_scores;
285 for (unsigned int icol = 0; icol < n_scores; icol++) {
286 decor_scores[irow] += scores[irow] * decorrelationMatrix->at(icol + offset);
287 }
288 }
289 return decor_scores;
290}
291
293{
294 float val = std::numeric_limits<float>::quiet_NaN();
295 if (std::holds_alternative<double>(varobj->function(particle))) {
296 val = (float)std::get<double>(varobj->function(particle));
297 } else if (std::holds_alternative<int>(varobj->function(particle))) {
298 val = (float)std::get<int>(varobj->function(particle));
299 } else if (std::holds_alternative<bool>(varobj->function(particle))) {
300 val = (float)std::get<bool>(varobj->function(particle));
301 } else {
302 B2ERROR("Variable '" << varobj->name << "' has wrong data type! It must be one of double, integer, or bool.");
303 }
304 if (std::isnan(val)) {
305 //NaNs cause crashed if using TMVA MulticlassBDT.
306 //In case other MVA methods are used, NaNs likely should be masked properly also.
307 B2FATAL("Variable '" << varobj->name <<
308 "' has NaN entries. Variable definitions in the MVA payload should include 'ifNaNGiveX(X, DEFAULT)'" <<
309 "with a proper default value if it is possible for the variable to evaluate as NaN.");
310 }
311
312 return val;
313}
static const unsigned int c_SetSize
Number of elements (for use in array bounds etc.)
Definition: Const.h:615
static const ParticleSet chargedStableSet
set of charged stable particles
Definition: Const.h:618
static const ChargedStable pion
charged pion particle
Definition: Const.h:661
std::unordered_map< unsigned int, std::unique_ptr< MVA::Expert > > m_experts
unordered map of MVA experts.
std::vector< const Variable::Manager::Var * > m_binningVariables
Vector of variables used to define the regions in which the MVAs are trained.
void checkDBPayloads()
Check payloads for consistency.
std::vector< float > decorrTransformation(const std::vector< float > scores, const std::vector< float > *decorrelationMatrix) const
decorrelation transformation.
virtual void initialize() override
Use this to initialize resources or memory your module needs.
virtual void event() override
Called once for each event.
std::unordered_map< unsigned int, std::unique_ptr< MVA::SingleDataset > > m_datasets
MVA dataset to be passed to the expert.
static constexpr double c_sqrt2
Definition of sqrt(2)
StoreObjPtr< EventMetaData > m_eventMetaData
The event information.
void initializeMVA()
Initialise the multiclass MVAs.
virtual ~ECLChargedPIDMVAModule()
Destructor.
float evaluateVariable(const Variable::Manager::Var *varobj, const Particle *particle)
evaluate the variable for the particle and convert the return value float.
float logTransformation(const float value, const float offset, const float max) const
log transformation.
static constexpr float c_dummyLogL
Dummy value of log Likelihood for a particle hypothesis.
virtual void beginRun() override
Called once before a new run begins.
std::unordered_map< unsigned int, std::vector< const Variable::Manager::Var * > > m_variables
Map of vectors containing the variables objects to be fed to the MVA.
StoreArray< Track > m_tracks
array of track objects.
float gaussTransformation(const float value, const TH1F *cdf) const
gaussian transformation.
std::unique_ptr< DBObjPtr< ECLChargedPIDMVAWeights > > m_mvaWeights
Interface to DB payloads containing MVA weightfiles and pdfs.
StoreArray< ECLPidLikelihood > m_eclPidLikelihoods
Array of ECLPidLikelihood objects.
std::string m_payload_name
The name of the database payload object with the MVA weights.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
static void initSupportedInterfaces()
Static function which initliazes all supported interfaces, has to be called once before getSupportedI...
Definition: Interface.cc:45
static std::map< std::string, AbstractInterface * > getSupportedInterfaces()
Returns interfaces supported by the MVA Interface.
Definition: Interface.h:53
General options which are shared by all MVA trainings.
Definition: Options.h:62
static Weightfile loadFromStream(std::istream &stream)
Static function which deserializes a Weightfile from a stream.
Definition: Weightfile.cc:251
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_ParallelProcessingCertified
This module can be run in parallel processing mode safely (All I/O must be done through the data stor...
Definition: Module.h:80
Class to store reconstructed particles.
Definition: Particle.h:75
Values of the result of a track fit with a given particle hypothesis.
Global list of available variables.
Definition: Manager.h:101
static Manager & Instance()
get singleton instance.
Definition: Manager.cc:25
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
Abstract base class for different kinds of events.
std::string name
Unique identifier of the function, used as key.
Definition: Manager.h:130
A variable returning a floating-point value for a given Particle.
Definition: Manager.h:146
FunctionPtr function
Pointer to function.
Definition: Manager.h:147