Belle II Software development
Regression.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8#pragma once
9
10#include <mva/interface/Dataset.h>
11#include <mva/interface/Expert.h>
12#include <mva/interface/Options.h>
13#include <mva/interface/Teacher.h>
14#include <mva/interface/Weightfile.h>
15
16#include <framework/logging/Logger.h>
17
18#include <boost/property_tree/ptree_fwd.hpp>
19#include <boost/program_options.hpp>
20#include <vector>
21
22namespace Belle2 {
27 namespace MVA {
28
33 template<class BaseClassifierOptions>
35 public:
37 void load(const boost::property_tree::ptree& pt) override;
38
40 void save(boost::property_tree::ptree& pt) const override;
41
43 po::options_description getDescription() override;
44
46 const BaseClassifierOptions& getBaseClassifierOptions() const;
47
49 BaseClassifierOptions& getBaseClassifierOptions();
50
52 unsigned int getMaximalBinNumber() const;
53
55 void setMaximalBinNumber(unsigned int maximalBinNumber);
56
57 private:
59 BaseClassifierOptions m_baseClassifierOptions;
61 unsigned int m_numberOfBins = 4;
62 };
63
77 class RegressionDataSet : public Dataset {
78 public:
80 RegressionDataSet(const GeneralOptions& general_options, Dataset* dataSet, double cutValue);
81
83 unsigned int getNumberOfFeatures() const override;
84
86 unsigned int getNumberOfEvents() const override;
87
89 unsigned int getNumberOfSpectators() const override;
90
92 void loadEvent(unsigned int iEvent) override;
93
95 std::vector<float> getFeature(unsigned int iFeature) override;
96
98 std::vector<float> getSpectator(unsigned int iSpectator) override;
99
101 std::vector<float> getWeights() override;
102
104 std::vector<float> getTargets() override;
105
106 private:
109
112 };
113
138 template<class BaseClassifierTeacher, class RegressionClassifierOptions>
139 class RegressionTeacher : public Teacher {
140 public:
142 RegressionTeacher(const GeneralOptions& general_options,
143 const RegressionClassifierOptions& specific_options);
144
155 Weightfile train(Dataset& training_data) const override;
156
157 private:
159 RegressionClassifierOptions m_specific_options;
161 BaseClassifierTeacher m_baseClassifierTeacher;
162 };
163
178 template<class BaseClassifierExpert, class RegressionClassifierOptions>
179 class RegressionExpert : public Expert {
180 public:
182 void load(Weightfile& weightfile) override;
183
185 std::vector<float> apply(Dataset& test_data) const override;
186
187 private:
189 std::vector<BaseClassifierExpert> m_baseClassifierExperts;
190 };
191 }
193}
194
195
196template<class BaseClassifierOptions>
197void Belle2::MVA::RegressionOptions<BaseClassifierOptions>::load(const boost::property_tree::ptree& pt)
198{
199 m_numberOfBins = pt.get<unsigned int>(getMethod() + "_numberOfBins");
200}
201
202template<class BaseClassifierOptions>
204{
205 return m_numberOfBins;
206}
207
208template<class BaseClassifierOptions>
210{
211 m_numberOfBins = maximalBinNumber;
212}
213
214template<class BaseClassifierOptions>
216{
217 return m_baseClassifierOptions;
218}
219
220template<class BaseClassifierOptions>
222{
223 return m_baseClassifierOptions;
224}
225
226template<class BaseClassifierOptions>
227void Belle2::MVA::RegressionOptions<BaseClassifierOptions>::save(boost::property_tree::ptree& pt) const
228{
229 pt.put(getMethod() + "_numberOfBins", m_numberOfBins);
230}
231
232template<class BaseClassifierOptions>
234{
235 auto description = getBaseClassifierOptions().getDescription();
236
237 description.add_options()
238 ("numberOfBins", po::value<unsigned int>(&m_numberOfBins),
239 "Number of bins to split the target variable into");
240
241 return description;
242}
243
244template<class BaseClassifierTeacher, class RegressionClassifierOptions>
246 Belle2::MVA::Dataset& training_data) const
247{
248 Weightfile weightfile;
249 weightfile.addOptions(m_general_options);
250 weightfile.addOptions(m_specific_options);
251
252 for (unsigned int binNumber = 1; binNumber < m_specific_options.getMaximalBinNumber(); binNumber++) {
253 RegressionDataSet specificDataSet(m_general_options, &training_data,
254 1.0 / m_specific_options.getMaximalBinNumber() * binNumber);
255 Weightfile specificWeightFile = m_baseClassifierTeacher.train(specificDataSet);
256
257 std::string file = weightfile.generateFileName();
258 Weightfile::saveToXMLFile(specificWeightFile, file);
259 weightfile.addFile("BaseClassifier_WeightFile_" + std::to_string(binNumber), file);
260 }
261 return weightfile;
262}
263
264template<class BaseClassifierTeacher, class RegressionClassifierOptions>
266 const Belle2::MVA::GeneralOptions& general_options,
267 const RegressionClassifierOptions& specific_options) :
268 Teacher(general_options), m_specific_options(specific_options),
269 m_baseClassifierTeacher(general_options, specific_options.getBaseClassifierOptions())
270{
271}
272
273template<class BaseClassifierExpert, class RegressionClassifierOptions>
275{
276 RegressionClassifierOptions specific_options;
277 weightfile.getOptions(specific_options);
278
279 m_baseClassifierExperts.resize(specific_options.getMaximalBinNumber());
280 for (unsigned int binNumber = 1; binNumber < specific_options.getMaximalBinNumber(); binNumber++) {
281 std::string file = weightfile.generateFileName();
282 weightfile.getFile("BaseClassifier_WeightFile_" + std::to_string(binNumber), file);
283
284 auto specificWeightFile = Weightfile::loadFromXMLFile(file);
285 m_baseClassifierExperts[binNumber].load(specificWeightFile);
286 }
287}
288
289template<class BaseClassifierExpert, class RegressionClassifierOptions>
291 Dataset& test_data) const
292{
293 std::vector<float> sum;
294 for (const auto& expert : m_baseClassifierExperts) {
295 if (sum.empty()) {
296 // First time we do not need to add something, but just replace
297 sum = expert.apply(test_data);
298 continue;
299 }
300
301 const auto& expertResult = expert.apply(test_data);
302 if (sum.size() != expertResult.size()) {
303 B2FATAL("There is one expert in the list that returned not the same number of results than the others!");
304 }
305
306 for (unsigned int index = 0; index < sum.size(); index++) {
307 sum[index] += expertResult[index];
308 }
309 }
310
311 for (unsigned int index = 0; index < sum.size(); index++) {
312 sum[index] /= m_baseClassifierExperts.size();
313 }
314
315 return sum;
316}
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
Abstract base class of all Expert Each MVA library has its own implementation of this class,...
Definition: Expert.h:31
General options which are shared by all MVA trainings.
Definition: Options.h:62
Dataset needed during the training of a regression method.
Definition: Regression.h:77
std::vector< float > getTargets() override
Return the targets from the real dataset.
Definition: Regression.cc:61
unsigned int getNumberOfEvents() const override
Return the number of events from the real dataset.
Definition: Regression.cc:36
std::vector< float > getSpectator(unsigned int iSpectator) override
Return a specific spectator from the real dataset.
Definition: Regression.cc:51
unsigned int getNumberOfSpectators() const override
Return the number of spectators from the real dataset.
Definition: Regression.cc:41
unsigned int getNumberOfFeatures() const override
Return the number of features from the real dataset.
Definition: Regression.cc:31
std::vector< float > getFeature(unsigned int iFeature) override
Return a specific feature from the real dataset.
Definition: Regression.cc:46
std::vector< float > getWeights() override
Return the weights from the real dataset.
Definition: Regression.cc:56
void loadEvent(unsigned int iEvent) override
Load an event. Sets all internal variables and sets the isSignal variable dependent on the cut value.
Definition: Regression.cc:21
Dataset * m_childDataSet
The real data set (our child)
Definition: Regression.h:111
double m_cutValue
The cut value.
Definition: Regression.h:108
Generic expert for the regression applications.
Definition: Regression.h:179
std::vector< float > apply(Dataset &test_data) const override
Apply the loaded experts by averaging over the single expert decisions.
Definition: Regression.h:290
std::vector< BaseClassifierExpert > m_baseClassifierExperts
The list of single experts.
Definition: Regression.h:189
void load(Weightfile &weightfile) override
Load the expert from the weightfile by loading each stored single classifier one ofter the other.
Definition: Regression.h:274
Generic options of the Regression MVA methods hosting the number of bins (and the base classifier opt...
Definition: Regression.h:34
BaseClassifierOptions m_baseClassifierOptions
Options of the base classifier.
Definition: Regression.h:59
const BaseClassifierOptions & getBaseClassifierOptions() const
Return options of the base classifier (const version)
Definition: Regression.h:215
unsigned int getMaximalBinNumber() const
Return the number of bins to use.
Definition: Regression.h:203
BaseClassifierOptions & getBaseClassifierOptions()
Return options of the base classifier (non-const version)
Definition: Regression.h:221
unsigned int m_numberOfBins
How many bins to use.
Definition: Regression.h:61
po::options_description getDescription() override
Get the descriptions for these options.
Definition: Regression.h:233
void load(const boost::property_tree::ptree &pt) override
Load the options from a boost property tree. Only loads the maximal bin number.
Definition: Regression.h:197
void setMaximalBinNumber(unsigned int maximalBinNumber)
Set the number of bins to use.
Definition: Regression.h:209
void save(boost::property_tree::ptree &pt) const override
Save the options from a boost property tree. Only saves the maximal bin number.
Definition: Regression.h:227
Core class for the training of regression methods based on binary classifiers.
Definition: Regression.h:139
BaseClassifierTeacher m_baseClassifierTeacher
The teacher to use for training.
Definition: Regression.h:161
RegressionClassifierOptions m_specific_options
The method specific options.
Definition: Regression.h:159
RegressionTeacher(const GeneralOptions &general_options, const RegressionClassifierOptions &specific_options)
Create a new teacher out of the general and the regression method specific options.
Definition: Regression.h:265
Weightfile train(Dataset &training_data) const override
Call the train function.
Definition: Regression.h:245
Specific Options, all method Options have to inherit from this class.
Definition: Options.h:98
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:29
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
Definition: Weightfile.cc:115
static Weightfile loadFromXMLFile(const std::string &filename)
Static function which loads a Weightfile from a XML file.
Definition: Weightfile.cc:240
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition: Weightfile.cc:62
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:67
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
Definition: Weightfile.cc:105
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Definition: Weightfile.cc:138
Abstract base class for different kinds of events.