Belle II Software development
MVAExpert.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8#include <tracking/trackingUtilities/mva/MVAExpert.h>
9
11#include <mva/dataobjects/DatabaseRepresentationOfWeightfile.h>
12#include <mva/interface/Expert.h>
13#include <mva/interface/Weightfile.h>
14#include <framework/database/DBObjPtr.h>
15
16namespace Belle2 {
22 namespace MVA {
23 class Expert;
24 class SingleDataset;
25 class Weightfile;
26 }
27
28 namespace TrackingUtilities {
31
32 public:
33 Impl(const std::string& identifier, std::vector<Named<Float_t*>> namedVariables);
34 void initialize();
35 void beginRun();
36 std::unique_ptr<MVA::Weightfile> getWeightFile();
37 double predict();
38 std::vector<float> predict(float* /* test_data */, int /* nFeature */, int /* nRows */);
39 std::vector<std::string> getVariableNames();
40 private:
42 std::vector<Named<Float_t*> > m_allNamedVariables;
43
45 std::vector<Named<Float_t*> > m_selectedNamedVariables;
46
48 std::unique_ptr<DBObjPtr<DatabaseRepresentationOfWeightfile> > m_weightfileRepresentation;
49
51 std::unique_ptr<MVA::Expert> m_expert;
52
54 std::unique_ptr<MVA::Dataset> m_dataset;
55
58
60 std::string m_identifier;
61 };
62 }
64}
65
67#include <mva/interface/Interface.h>
68
69#include <framework/utilities/FileSystem.h>
70#include <framework/logging/Logger.h>
71
72#include <algorithm>
73
74using namespace Belle2;
75using namespace TrackingUtilities;
76
77MVAExpert::Impl::Impl(const std::string& identifier,
78 std::vector<Named<Float_t*> > namedVariables)
79 : m_allNamedVariables(std::move(namedVariables))
80 , m_identifier(identifier)
81{
82}
83
85{
88 not(m_identifier.ends_with(".root") or m_identifier.ends_with(".xml"))) {
89 using DBWeightFileRepresentation = DBObjPtr<DatabaseRepresentationOfWeightfile>;
90 m_weightfileRepresentation = std::make_unique<DBWeightFileRepresentation>(m_identifier);
91 }
92 if ((not m_weightfileRepresentation) or (not m_weightfileRepresentation->isValid())) {
93 B2FATAL("No weight file could be loaded in tracking/trackingUtilities/mva/MVAExpert.");
94 }
95}
96
98{
99 std::unique_ptr<MVA::Weightfile> weightfile = getWeightFile();
100 if (weightfile) {
101 if ((weightfile->getElement<std::string>("method") == "FastBDT" and
102 (weightfile->getElement<int>("FastBDT_version") == 1 or
103 weightfile->getElement<int>("FastBDT_version") == 2)) or
104 (weightfile->getElement<std::string>("method") == "Python")) {
105
106 int nExpectedVars = weightfile->getElement<int>("number_feature_variables");
107
109 for (int iVar = 0; iVar < nExpectedVars; ++iVar) {
110 std::string variableElementName = "variable" + std::to_string(iVar);
111 std::string expectedName = weightfile->getElement<std::string>(variableElementName);
112 auto itNamedVariable = std::find_if(m_allNamedVariables.begin(),
114 [expectedName](const Named<Float_t*>& namedVariable) {
115 return namedVariable.getName() == expectedName;
116 });
117
118 if (itNamedVariable == m_allNamedVariables.end()) {
119 B2ERROR("Variable name " << iVar << " mismatch for FastBDT. " <<
120 "Could not find expected variable '" << expectedName << "'");
121 }
122 m_selectedNamedVariables.push_back(*itNamedVariable);
123 }
124 B2ASSERT("Number of variables mismatch", nExpectedVars == static_cast<int>(m_selectedNamedVariables.size()));
125 } else {
126 B2WARNING("Unpacked new kind of classifier. Consider to extend the feature variable check. Identifier name: " << m_identifier
127 << "; method name: " << weightfile->getElement<std::string>("method"));
129 }
130
131 std::map<std::string, MVA::AbstractInterface*> supportedInterfaces =
133 weightfile->getOptions(m_generalOptions);
134 m_expert = supportedInterfaces[m_generalOptions.m_method]->getExpert();
135 m_expert->load(*weightfile);
136
137 std::vector<float> dummy;
138 dummy.resize(m_selectedNamedVariables.size(), 0);
139 m_dataset = std::make_unique<MVA::SingleDataset>(m_generalOptions, std::move(dummy), 0);
140 } else {
141 B2ERROR("Could not find weight file for identifier " << m_identifier);
142 }
143}
144
145std::unique_ptr<MVA::Weightfile> MVAExpert::Impl::getWeightFile()
146{
148 std::stringstream ss((*m_weightfileRepresentation)->m_data);
149 return std::make_unique<MVA::Weightfile>(MVA::Weightfile::loadFromStream(ss));
150 } else {
151 std::string weightFilePath = FileSystem::findFile(m_identifier);
152 return std::make_unique<MVA::Weightfile>(MVA::Weightfile::loadFromFile(weightFilePath));
153 }
154}
155
157{
158 if (not m_expert) {
159 B2ERROR("MVA Expert is not loaded! I will return 0");
160 return NAN;
161 }
162
163 // Transfer the extracted values to the data set were the expert can find them
164 for (unsigned int i = 0; i < m_selectedNamedVariables.size(); ++i) {
165 m_dataset->m_input[i] = *m_selectedNamedVariables[i];
166 }
167 return m_expert->apply(*m_dataset)[0];
168}
169
170std::vector<float> MVAExpert::Impl::predict(float* test_data, int nFeature, int nRows)
171{
172 std::vector<std::vector<float>> spectators;
173 std::vector<std::vector <float> > data;
174 data.resize(nRows);
175 for (int iRow = 0; iRow < nRows; iRow += 1) {
176 data[iRow].resize(nFeature);
177 for (int iFeature = 0; iFeature < nFeature; iFeature += 1) {
178 data[iRow][iFeature] = test_data[nFeature * iRow + iFeature];
179 }
180 }
181
182 MVA::MultiDataset dataSet(m_generalOptions, data, spectators);
183 return m_expert->apply(dataSet);
184}
185
186std::vector<std::string> MVAExpert::Impl::getVariableNames()
187{
188 std::vector<std::string> out(m_selectedNamedVariables.size());
189 for (size_t iName = 0; iName < m_selectedNamedVariables.size(); iName += 1) {
190 out[iName] = m_selectedNamedVariables[iName].getName();
191 }
192 return out;
193}
194
195
197// Silence Doxygen which is complaining that "no matching class member found for"
198// But there should be a better way that I just don't know of / find
200MVAExpert::MVAExpert(const std::string& identifier,
201 std::vector<Named<Float_t*> > namedVariables)
202 : m_impl(std::make_unique<MVAExpert::Impl>(identifier, std::move(namedVariables)))
204{
205}
206
207MVAExpert::~MVAExpert() = default;
208
210{
211 return m_impl->initialize();
212}
213
215{
216 return m_impl->beginRun();
217}
218
220{
221 return m_impl->predict();
222}
223
224std::vector<float> MVAExpert::predict(float* test_data, int nFeature, int nRows)
225{
226 return m_impl->predict(test_data, nFeature, nRows);
227}
228
229std::vector<std::string> MVAExpert::getVariableNames()
230{
231 return m_impl->getVariableNames();
232}
233
Class for accessing objects in the database.
Definition DBObjPtr.h:21
Database representation of a Weightfile object.
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
static void initSupportedInterfaces()
Static function which initializes all supported interfaces, has to be called once before getSupported...
Definition Interface.cc:46
static std::map< std::string, AbstractInterface * > getSupportedInterfaces()
Returns interfaces supported by the MVA Interface.
Definition Interface.h:53
Abstract base class of all Expert Each MVA library has its own implementation of this class,...
Definition Expert.h:31
General options which are shared by all MVA trainings.
Definition Options.h:62
Wraps the data of a multiple event into a Dataset.
Definition Dataset.h:186
Wraps the data of a single event into a Dataset.
Definition Dataset.h:135
The Weightfile class serializes all information about a training into an xml tree.
Definition Weightfile.h:38
static Weightfile loadFromStream(std::istream &stream)
Static function which deserializes a Weightfile from a stream.
static Weightfile loadFromFile(const std::string &filename)
Static function which loads a Weightfile from a file.
Implementation of the class to interact with the MVA package.
Definition MVAExpert.cc:30
void initialize()
Signal the beginning of the event processing.
Definition MVAExpert.cc:84
void beginRun()
Called once before a new run begins.
Definition MVAExpert.cc:97
std::unique_ptr< DBObjPtr< DatabaseRepresentationOfWeightfile > > m_weightfileRepresentation
Database pointer to the Database representation of the weightfile.
Definition MVAExpert.cc:48
std::unique_ptr< MVA::Weightfile > getWeightFile()
Get the weight file.
Definition MVAExpert.cc:145
std::unique_ptr< MVA::Expert > m_expert
Pointer to the current MVA Expert.
Definition MVAExpert.cc:51
std::vector< Named< Float_t * > > m_selectedNamedVariables
References to the selected named values from the source variable set.
Definition MVAExpert.cc:45
Impl(const std::string &identifier, std::vector< Named< Float_t * > > namedVariables)
constructor
Definition MVAExpert.cc:77
std::unique_ptr< MVA::Dataset > m_dataset
Pointer to the current dataset.
Definition MVAExpert.cc:54
std::vector< Named< Float_t * > > m_allNamedVariables
References to the all named values from the source variable set.
Definition MVAExpert.cc:42
MVA::GeneralOptions m_generalOptions
General options.
Definition MVAExpert.cc:57
double predict()
Get the MVA prediction.
Definition MVAExpert.cc:156
std::vector< std::string > getVariableNames()
Get selected variable names.
Definition MVAExpert.cc:186
std::string m_identifier
DB identifier of the expert or file name.
Definition MVAExpert.cc:60
void initialize()
Initialise the mva method.
Definition MVAExpert.cc:209
void beginRun()
Update the mva method to the new run.
Definition MVAExpert.cc:214
std::unique_ptr< Impl > m_impl
Pointer to implementation hiding the details.
Definition MVAExpert.h:59
~MVAExpert()
Destructor must be defined in cpp because of PImpl pointer.
MVAExpert(const std::string &identifier, std::vector< Named< Float_t * > > namedVariables)
Construct the Expert with the specified weight folder and the name of the training that was used in t...
double predict()
Evaluate the MVA method and return the MVAOutput.
Definition MVAExpert.cc:219
std::vector< std::string > getVariableNames()
Get selected variable names.
Definition MVAExpert.cc:229
A mixin class to attach a name to an object.
Definition Named.h:23
Abstract base class for different kinds of events.
STL namespace.