10 #include <ecl/modules/eclChargedPIDMVA/ECLChargedPIDMVAModule.h>
13 #include <analysis/VariableManager/Manager.h>
14 #include <analysis/dataobjects/Particle.h>
15 #include <framework/gearbox/Const.h>
16 #include <framework/logging/Logger.h>
17 #include <mdst/dataobjects/Track.h>
18 #include <mva/interface/Interface.h>
34 setDescription(
"This module implements charged particle identification using ECL-related observables via a multiclass MVA. For each track a set of relevant ECL variables(shower shape, PSD etc.) are fed to the MVA method which is stored in a conditions database payload. The MVA output variables are then used to construct a likelihood from pdfs also stored in the payload. The likelihood is then stored in the ECLPidLikelihood object.");
38 "The name of the database payload object with the MVA weights.",
39 std::string(
"ECLChargedPidMVAWeights"));
56 if (!
m_mvaWeights) {B2FATAL(
"No ECLChargedPidMVAWeights payload found in database!");}
62 ". Loading supported MVA interfaces for multi-class charged particle identification.");
70 for (
const auto& iterator : * (*
m_mvaWeights.get())->getPhasespaceCategories()) {
72 B2DEBUG(22,
"\t\tweightfile[" << iterator.first <<
"]");
75 std::stringstream ss(iterator.second.getSerialisedWeight());
79 weightfile.getOptions(general_options);
82 m_experts[iterator.first] = supported_interfaces[general_options.m_method]->getExpert();
83 m_experts.at(iterator.first)->load(weightfile);
85 B2DEBUG(22,
"\t\tweightfile at " << iterator.first <<
" successfully initialised.");
89 const std::string identifier = std::string(
"de_aliased_clf_vars");
90 auto clf_vars = weightfile.getVector<std::string>(identifier);
91 m_variables[iterator.first] = manager.getVariables(clf_vars);
93 B2DEBUG(22,
"\t\t Weightfile at " << iterator.first <<
" has " <<
m_variables[iterator.first].size() <<
" features.");
95 std::vector<float> features(clf_vars.size(), 0.0);
96 std::vector<float> spectators;
98 m_datasets[iterator.first] = std::make_unique<MVA::SingleDataset>(general_options, features, 1.0, spectators);
112 for (
const auto& track :
m_tracks) {
116 if (fitRes ==
nullptr)
continue;
121 const int charge = fitRes->getChargeSign();
122 if (charge == 0)
continue;
129 for (
unsigned int ivar(0); ivar < binningVariableValues.size(); ivar++) {
133 const int linearCategoryIndex = (*
m_mvaWeights.get())->getLinearisedCategoryIndex(binningVariableValues);
138 if (!(*
m_mvaWeights.get())->isPhasespaceCovered(linearCategoryIndex))
continue;
141 const auto phasespaceCategory = (*
m_mvaWeights.get())->getPhasespaceCategory(linearCategoryIndex);
142 const auto transformMode = phasespaceCategory->getTransformMode();
145 unsigned int nvars =
m_variables.at(linearCategoryIndex).size();
146 for (
unsigned int ivar(0); ivar < nvars; ++ivar) {
147 auto varobj =
m_variables.at(linearCategoryIndex).at(ivar);
152 std::vector<float> scores =
m_experts.at(linearCategoryIndex)->applyMulticlass(*
m_datasets.at(linearCategoryIndex))[0];
158 for (
unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
159 if ((transformMode != transformModes::c_DirectMVAResponse) and
160 (transformMode != transformModes::c_LogMVAResponse)) {
162 phasespaceCategory->getLogTransformOffset(),
163 phasespaceCategory->getMaxPossibleResponseValue());
167 B2DEBUG(22,
"Scores: ");
168 for (
unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
169 B2DEBUG(22,
"\t\t " << iResponse <<
" : " << scores[iResponse]);
176 unsigned int hypo_idx = hypo.getIndex();
177 auto absPdgId = abs(hypo.getPDGCode());
180 std::vector<float> transformed_scores;
181 transformed_scores = scores;
184 if ((transformMode == transformModes::c_GaussianTransform) or
185 (transformMode == transformModes::c_DecorrelationTransform)) {
188 for (
unsigned int iResponse = 0; iResponse < scores.size(); iResponse++) {
189 transformed_scores[iResponse] =
gaussTransformation(scores[iResponse], phasespaceCategory->getCDF(absPdgId, iResponse));
191 if (transformMode == transformModes::c_DecorrelationTransform) {
192 transformed_scores =
decorrTransformation(transformed_scores, phasespaceCategory->getDecorrelationMatrix(absPdgId));
198 if ((transformMode == transformModes::c_DirectMVAResponse) or
199 (transformMode == transformModes::c_LogMVAResponse)) {
201 logLikelihoods[hypo_idx] = scores[phasespaceCategory->getMVAIndexForHypothesis(absPdgId)];
202 if (transformMode == transformModes::c_LogMVAResponse) {
203 logLikelihoods[hypo_idx] = (std::isnormal(logLikelihoods[hypo_idx])
204 && logLikelihoods[hypo_idx] > 0) ? std::log(logLikelihoods[hypo_idx]) :
c_dummyLogL;
206 logLikelihoods[hypo_idx] = logLikelihoods[hypo_idx] / phasespaceCategory->getTemperature();
209 B2DEBUG(22,
"MVA Index for hypo " << absPdgId <<
" : " << phasespaceCategory->getMVAIndexForHypothesis(absPdgId));
211 for (
unsigned int iResponse = 0; iResponse < transformed_scores.size(); iResponse++) {
212 if ((transformMode == transformModes::c_LogTransformSingle) and
213 (phasespaceCategory->getMVAIndexForHypothesis(absPdgId) != iResponse)) {
continue;}
216 phasespaceCategory->getPDF(iResponse, absPdgId)->GetRange(xmin, xmax);
218 float transformed_score_copy = transformed_scores[iResponse];
219 if (transformed_score_copy < xmin) transformed_score_copy = xmin + 1e-5;
220 if (transformed_score_copy > xmax) transformed_score_copy = xmax - 1e-5;
222 float pdfval = phasespaceCategory->getPDF(iResponse, absPdgId)->Eval(transformed_score_copy);
224 logL += (std::isnormal(pdfval) && pdfval > 0) ? std::log(pdfval) :
c_dummyLogL;
225 B2DEBUG(22,
"MVA response index, MVA score, logL: " << iResponse <<
" " << transformed_score_copy <<
" " << logL <<
" \n");
227 logLikelihoods[hypo_idx] = logL;
230 B2DEBUG(22,
"logL: ");
232 B2DEBUG(22,
"\t\t " << iLogL <<
" : " << logLikelihoods[iLogL]);
237 track.addRelationTo(eclPidLikelihood);
243 return std::log((value + offset) / (max + offset - value));
248 int binIndex = cdf->GetXaxis()->FindBin(value);
250 double dx = cdf->GetBinWidth(binIndex);
251 double cumulant = cdf->GetBinContent(binIndex);
254 if (binIndex == cdf->GetNbinsX()) {
255 dy = 1 - cdf->GetBinContent(binIndex);
256 }
else if (binIndex == 0) {
257 dy = cdf->GetBinContent(binIndex);
259 dy = cdf->GetBinContent(binIndex + 1) - cdf->GetBinContent(binIndex);
262 cumulant = cumulant + (value - cdf->GetBinLowEdge(binIndex)) * dy / dx;
264 cumulant = std::min(cumulant, 1.0 - 1e-9);
265 cumulant = std::max(cumulant, 1e-9);
267 double maxErfInvArgRange = 0.99999999;
268 double arg = 2.0 * cumulant - 1.0;
270 arg = std::min(maxErfInvArgRange, arg);
271 arg = std::max(-maxErfInvArgRange, arg);
273 return c_sqrt2 * TMath::ErfInverse(arg);
278 const std::vector<float>* decorrelationMatrix)
const
280 unsigned int n_scores = scores.size();
281 std::vector<float> decor_scores(n_scores, 0.0);
283 for (
unsigned int irow = 0; irow < n_scores; irow++) {
284 unsigned int offset = irow * n_scores;
285 for (
unsigned int icol = 0; icol < n_scores; icol++) {
286 decor_scores[irow] += scores[irow] * decorrelationMatrix->at(icol + offset);
294 float val = std::numeric_limits<float>::quiet_NaN();
295 if (std::holds_alternative<double>(varobj->
function(particle))) {
296 val = (float)std::get<double>(varobj->
function(particle));
297 }
else if (std::holds_alternative<int>(varobj->
function(particle))) {
298 val = (float)std::get<int>(varobj->
function(particle));
299 }
else if (std::holds_alternative<bool>(varobj->
function(particle))) {
300 val = (float)std::get<bool>(varobj->
function(particle));
302 B2ERROR(
"Variable '" << varobj->
name <<
"' has wrong data type! It must be one of double, integer, or bool.");
304 if (std::isnan(val)) {
307 B2FATAL(
"Variable '" << varobj->
name <<
308 "' has NaN entries. Variable definitions in the MVA payload should include 'ifNaNGiveX(X, DEFAULT)'" <<
309 "with a proper default value if it is possible for the variable to evaluate as NaN.");
static const unsigned int c_SetSize
Number of elements (for use in array bounds etc.)
static const ParticleSet chargedStableSet
set of charged stable particles
static const ChargedStable pion
charged pion particle
std::unordered_map< unsigned int, std::unique_ptr< MVA::Expert > > m_experts
unordered map of MVA experts.
std::vector< const Variable::Manager::Var * > m_binningVariables
Vector of variables used to define the regions in which the MVAs are trained.
void checkDBPayloads()
Check payloads for consistency.
std::vector< float > decorrTransformation(const std::vector< float > scores, const std::vector< float > *decorrelationMatrix) const
decorrelation transformation.
virtual void initialize() override
Use this to initialize resources or memory your module needs.
virtual void event() override
Called once for each event.
std::unordered_map< unsigned int, std::unique_ptr< MVA::SingleDataset > > m_datasets
MVA dataset to be passed to the expert.
ECLChargedPIDMVAModule()
Constructor.
static constexpr double c_sqrt2
Definition of sqrt(2)
StoreObjPtr< EventMetaData > m_eventMetaData
The event information.
void initializeMVA()
Initialise the multiclass MVAs.
virtual ~ECLChargedPIDMVAModule()
Destructor.
float evaluateVariable(const Variable::Manager::Var *varobj, const Particle *particle)
evaluate the variable for the particle and convert the return value float.
float logTransformation(const float value, const float offset, const float max) const
log transformation.
static constexpr float c_dummyLogL
Dummy value of log Likelihood for a particle hypothesis.
virtual void beginRun() override
Called once before a new run begins.
std::unordered_map< unsigned int, std::vector< const Variable::Manager::Var * > > m_variables
Map of vectors containing the variables objects to be fed to the MVA.
StoreArray< Track > m_tracks
array of track objects.
float gaussTransformation(const float value, const TH1F *cdf) const
gaussian transformation.
std::unique_ptr< DBObjPtr< ECLChargedPIDMVAWeights > > m_mvaWeights
Interface to DB payloads containing MVA weightfiles and pdfs.
StoreArray< ECLPidLikelihood > m_eclPidLikelihoods
Array of ECLPidLikelihood objects.
std::string m_payload_name
The name of the database payload object with the MVA weights.
MVAResponseTransformMode
Enum of implemented transformations which can be applied to the MVA response.
static std::map< std::string, AbstractInterface * > getSupportedInterfaces()
Returns interfaces supported by the MVA Interface.
static void initSupportedInterfaces()
Static function which initliazes all supported interfaces, has to be called once before getSupportedI...
General options which are shared by all MVA trainings.
static Weightfile loadFromStream(std::istream &stream)
Static function which deserializes a Weightfile from a stream.
void setDescription(const std::string &description)
Sets the description of the module.
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
@ c_ParallelProcessingCertified
This module can be run in parallel processing mode safely (All I/O must be done through the data stor...
Class to store reconstructed particles.
Values of the result of a track fit with a given particle hypothesis.
Global list of available variables.
static Manager & Instance()
get singleton instance.
REG_MODULE(arichBtest)
Register the Module.
void addParam(const std::string &name, T ¶mVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Abstract base class for different kinds of events.
std::string name
Unique identifier of the function, used as key.
A variable returning a floating-point value for a given Particle.
FunctionPtr function
Pointer to function.