Belle II Software  release-05-02-19
Regression.h
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2018 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Nils Braun *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 #pragma once
11 
12 #include <mva/interface/Dataset.h>
13 #include <mva/interface/Expert.h>
14 #include <mva/interface/Options.h>
15 #include <mva/interface/Teacher.h>
16 #include <mva/interface/Weightfile.h>
17 
18 #include <framework/logging/Logger.h>
19 
20 #include <boost/property_tree/ptree_fwd.hpp>
21 #include <boost/program_options.hpp>
22 #include <vector>
23 
24 namespace Belle2 {
29  namespace MVA {
30 
35  template<class BaseClassifierOptions>
36  class RegressionOptions : public SpecificOptions {
37  public:
39  void load(const boost::property_tree::ptree& pt) override;
40 
42  void save(boost::property_tree::ptree& pt) const override;
43 
45  po::options_description getDescription() override;
46 
48  const BaseClassifierOptions& getBaseClassifierOptions() const;
49 
51  BaseClassifierOptions& getBaseClassifierOptions();
52 
54  unsigned int getMaximalBinNumber() const;
55 
57  void setMaximalBinNumber(unsigned int maximalBinNumber);
58 
59  private:
61  BaseClassifierOptions m_baseClassifierOptions;
63  unsigned int m_numberOfBins = 4;
64  };
65 
79  class RegressionDataSet : public Dataset {
80  public:
82  RegressionDataSet(const GeneralOptions& general_options, Dataset* dataSet, double cutValue);
83 
85  unsigned int getNumberOfFeatures() const override;
86 
88  unsigned int getNumberOfEvents() const override;
89 
91  unsigned int getNumberOfSpectators() const override;
92 
94  void loadEvent(unsigned int iEvent) override;
95 
97  std::vector<float> getFeature(unsigned int iFeature) override;
98 
100  std::vector<float> getSpectator(unsigned int iSpectator) override;
101 
103  std::vector<float> getWeights() override;
104 
106  std::vector<float> getTargets() override;
107 
108  private:
110  double m_cutValue;
111 
114  };
115 
140  template<class BaseClassifierTeacher, class RegressionClassifierOptions>
141  class RegressionTeacher : public Teacher {
142  public:
144  RegressionTeacher(const GeneralOptions& general_options,
145  const RegressionClassifierOptions& specific_options);
146 
157  Weightfile train(Dataset& training_data) const override;
158 
159  private:
161  RegressionClassifierOptions m_specific_options;
163  BaseClassifierTeacher m_baseClassifierTeacher;
164  };
165 
180  template<class BaseClassifierExpert, class RegressionClassifierOptions>
181  class RegressionExpert : public Expert {
182  public:
184  void load(Weightfile& weightfile) override;
185 
187  std::vector<float> apply(Dataset& test_data) const override;
188 
189  private:
191  std::vector<BaseClassifierExpert> m_baseClassifierExperts;
192  };
193  }
195 }
196 
197 
198 template<class BaseClassifierOptions>
199 void Belle2::MVA::RegressionOptions<BaseClassifierOptions>::load(const boost::property_tree::ptree& pt)
200 {
201  m_numberOfBins = pt.get<unsigned int>(getMethod() + "_numberOfBins");
202 }
203 
204 template<class BaseClassifierOptions>
206 {
207  return m_numberOfBins;
208 }
209 
210 template<class BaseClassifierOptions>
212 {
213  m_numberOfBins = maximalBinNumber;
214 }
215 
216 template<class BaseClassifierOptions>
218 {
219  return m_baseClassifierOptions;
220 }
221 
222 template<class BaseClassifierOptions>
224 {
225  return m_baseClassifierOptions;
226 }
227 
228 template<class BaseClassifierOptions>
229 void Belle2::MVA::RegressionOptions<BaseClassifierOptions>::save(boost::property_tree::ptree& pt) const
230 {
231  pt.put(getMethod() + "_numberOfBins", m_numberOfBins);
232 }
233 
234 template<class BaseClassifierOptions>
236 {
237  auto description = getBaseClassifierOptions().getDescription();
238 
239  description.add_options()
240  ("numberOfBins", po::value<unsigned int>(&m_numberOfBins),
241  "Number of bins to split the target variable into");
242 
243  return description;
244 }
245 
246 template<class BaseClassifierTeacher, class RegressionClassifierOptions>
248  Belle2::MVA::Dataset& training_data) const
249 {
250  Weightfile weightfile;
251  weightfile.addOptions(m_general_options);
252  weightfile.addOptions(m_specific_options);
253 
254  for (unsigned int binNumber = 1; binNumber < m_specific_options.getMaximalBinNumber(); binNumber++) {
255  RegressionDataSet specificDataSet(m_general_options, &training_data,
256  1.0 / m_specific_options.getMaximalBinNumber() * binNumber);
257  Weightfile specificWeightFile = m_baseClassifierTeacher.train(specificDataSet);
258 
259  std::string file = weightfile.generateFileName();
260  Weightfile::saveToXMLFile(specificWeightFile, file);
261  weightfile.addFile("BaseClassifier_WeightFile_" + std::to_string(binNumber), file);
262  }
263  return weightfile;
264 }
265 
266 template<class BaseClassifierTeacher, class RegressionClassifierOptions>
268  const Belle2::MVA::GeneralOptions& general_options,
269  const RegressionClassifierOptions& specific_options) :
270  Teacher(general_options), m_specific_options(specific_options),
271  m_baseClassifierTeacher(general_options, specific_options.getBaseClassifierOptions())
272 {
273 }
274 
275 template<class BaseClassifierExpert, class RegressionClassifierOptions>
277 {
278  RegressionClassifierOptions specific_options;
279  weightfile.getOptions(specific_options);
280 
281  m_baseClassifierExperts.resize(specific_options.getMaximalBinNumber());
282  for (unsigned int binNumber = 1; binNumber < specific_options.getMaximalBinNumber(); binNumber++) {
283  std::string file = weightfile.generateFileName();
284  weightfile.getFile("BaseClassifier_WeightFile_" + std::to_string(binNumber), file);
285 
286  auto specificWeightFile = Weightfile::loadFromXMLFile(file);
287  m_baseClassifierExperts[binNumber].load(specificWeightFile);
288  }
289 }
290 
291 template<class BaseClassifierExpert, class RegressionClassifierOptions>
293  Dataset& test_data) const
294 {
295  std::vector<float> sum;
296  for (const auto& expert : m_baseClassifierExperts) {
297  if (sum.empty()) {
298  // First time we do not need to add something, but just replace
299  sum = expert.apply(test_data);
300  continue;
301  }
302 
303  const auto& expertResult = expert.apply(test_data);
304  if (sum.size() != expertResult.size()) {
305  B2FATAL("There is one expert in the list that returned not the same number of results than the others!");
306  }
307 
308  for (unsigned int index = 0; index < sum.size(); index++) {
309  sum[index] += expertResult[index];
310  }
311  }
312 
313  for (unsigned int index = 0; index < sum.size(); index++) {
314  sum[index] /= m_baseClassifierExperts.size();
315  }
316 
317  return sum;
318 }
Belle2::MVA::RegressionOptions::setMaximalBinNumber
void setMaximalBinNumber(unsigned int maximalBinNumber)
Set the number of bins to use.
Definition: Regression.h:211
Belle2::MVA::RegressionTeacher::train
Weightfile train(Dataset &training_data) const override
Call the train function.
Definition: Regression.h:247
Belle2::MVA::RegressionDataSet::getSpectator
std::vector< float > getSpectator(unsigned int iSpectator) override
Return a specific spectator from the real dataset.
Definition: Regression.cc:53
Belle2::MVA::RegressionDataSet
Dataset needed during the training of a regression method.
Definition: Regression.h:87
Belle2::MVA::Dataset
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:34
Belle2::MVA::Weightfile::getOptions
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:76
Belle2::MVA::RegressionDataSet::RegressionDataSet
RegressionDataSet(const GeneralOptions &general_options, Dataset *dataSet, double cutValue)
Create a new regression data set out of the general options, a pointer to the real dataset and the cu...
Definition: Regression.cc:18
Belle2::MVA::RegressionExpert::m_baseClassifierExperts
std::vector< BaseClassifierExpert > m_baseClassifierExperts
The list of single experts.
Definition: Regression.h:199
Belle2::MVA::Weightfile::addFile
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
Definition: Weightfile.cc:124
Belle2::MVA::Weightfile
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:40
Belle2::MVA::RegressionOptions::m_baseClassifierOptions
BaseClassifierOptions m_baseClassifierOptions
Options of the base classifier.
Definition: Regression.h:69
Belle2::MVA::RegressionTeacher::m_baseClassifierTeacher
BaseClassifierTeacher m_baseClassifierTeacher
The teacher to use for training.
Definition: Regression.h:171
Belle2::MVA::RegressionDataSet::getWeights
std::vector< float > getWeights() override
Return the weights from the real dataset.
Definition: Regression.cc:58
Belle2::MVA::RegressionDataSet::getNumberOfFeatures
unsigned int getNumberOfFeatures() const override
Return the number of features from the real dataset.
Definition: Regression.cc:33
Belle2::MVA::RegressionTeacher::RegressionTeacher
RegressionTeacher(const GeneralOptions &general_options, const RegressionClassifierOptions &specific_options)
Create a new teacher out of the general and the regression method specific options.
Definition: Regression.h:267
Belle2::MVA::RegressionDataSet::getFeature
std::vector< float > getFeature(unsigned int iFeature) override
Return a specific feature from the real dataset.
Definition: Regression.cc:48
Belle2::MVA::RegressionOptions::load
void load(const boost::property_tree::ptree &pt) override
Load the options from a boost property tree. Only loads the maximal bin number.
Definition: Regression.h:199
Belle2::MVA::RegressionDataSet::getTargets
std::vector< float > getTargets() override
Return the targets from the real dataset.
Definition: Regression.cc:63
Belle2::MVA::RegressionDataSet::m_childDataSet
Dataset * m_childDataSet
The real data set (our child)
Definition: Regression.h:121
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MVA::RegressionExpert::apply
std::vector< float > apply(Dataset &test_data) const override
Apply the loaded experts by averaging over the single expert decisions.
Definition: Regression.h:292
Belle2::MVA::RegressionExpert::load
void load(Weightfile &weightfile) override
Load the expert from the weightfile by loading each stored single classifier one ofter the other.
Definition: Regression.h:276
Belle2::MVA::Teacher
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:31
Belle2::MVA::RegressionDataSet::getNumberOfEvents
unsigned int getNumberOfEvents() const override
Return the number of events from the real dataset.
Definition: Regression.cc:38
Belle2::MVA::Weightfile::addOptions
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition: Weightfile.cc:71
Belle2::MVA::RegressionOptions::m_numberOfBins
unsigned int m_numberOfBins
How many bins to use.
Definition: Regression.h:71
Belle2::MVA::Weightfile::loadFromXMLFile
static Weightfile loadFromXMLFile(const std::string &filename)
Static function which loads a Weightfile from a XML file.
Definition: Weightfile.cc:249
Belle2::MVA::GeneralOptions
General options which are shared by all MVA trainings.
Definition: Options.h:64
Belle2::MVA::RegressionTeacher::m_specific_options
RegressionClassifierOptions m_specific_options
The method specific options.
Definition: Regression.h:169
Belle2::MVA::Weightfile::getFile
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Definition: Weightfile.cc:147
Belle2::MVA::RegressionOptions::getMaximalBinNumber
unsigned int getMaximalBinNumber() const
Return the number of bins to use.
Definition: Regression.h:205
Belle2::MVA::RegressionDataSet::m_cutValue
double m_cutValue
The cut value.
Definition: Regression.h:118
Belle2::MVA::RegressionDataSet::getNumberOfSpectators
unsigned int getNumberOfSpectators() const override
Return the number of spectators from the real dataset.
Definition: Regression.cc:43
Belle2::MVA::RegressionOptions::getDescription
po::options_description getDescription() override
Get the descriptions for these options.
Definition: Regression.h:235
Belle2::MVA::Weightfile::generateFileName
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
Definition: Weightfile.cc:114
Belle2::MVA::Dataset::Dataset
Dataset(const GeneralOptions &general_options)
Constructs a new dataset given the general options.
Definition: Dataset.cc:38
Belle2::MVA::RegressionOptions::save
void save(boost::property_tree::ptree &pt) const override
Save the options from a boost property tree. Only saves the maximal bin number.
Definition: Regression.h:229
Belle2::MVA::RegressionDataSet::loadEvent
void loadEvent(unsigned int iEvent) override
Load an event. Sets all internal variables and sets the isSignal variable dependent on the cut value.
Definition: Regression.cc:23
Belle2::MVA::RegressionOptions::getBaseClassifierOptions
const BaseClassifierOptions & getBaseClassifierOptions() const
Return options of the base classifier (const version)
Definition: Regression.h:217