Belle II Software  release-08-01-10
Regression.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 #pragma once
9 
10 #include <mva/interface/Dataset.h>
11 #include <mva/interface/Expert.h>
12 #include <mva/interface/Options.h>
13 #include <mva/interface/Teacher.h>
14 #include <mva/interface/Weightfile.h>
15 
16 #include <framework/logging/Logger.h>
17 
18 #include <boost/property_tree/ptree_fwd.hpp>
19 #include <boost/program_options.hpp>
20 #include <vector>
21 
22 namespace Belle2 {
27  namespace MVA {
28 
33  template<class BaseClassifierOptions>
35  public:
37  void load(const boost::property_tree::ptree& pt) override;
38 
40  void save(boost::property_tree::ptree& pt) const override;
41 
43  po::options_description getDescription() override;
44 
46  const BaseClassifierOptions& getBaseClassifierOptions() const;
47 
49  BaseClassifierOptions& getBaseClassifierOptions();
50 
52  unsigned int getMaximalBinNumber() const;
53 
55  void setMaximalBinNumber(unsigned int maximalBinNumber);
56 
57  private:
59  BaseClassifierOptions m_baseClassifierOptions;
61  unsigned int m_numberOfBins = 4;
62  };
63 
77  class RegressionDataSet : public Dataset {
78  public:
80  RegressionDataSet(const GeneralOptions& general_options, Dataset* dataSet, double cutValue);
81 
83  unsigned int getNumberOfFeatures() const override;
84 
86  unsigned int getNumberOfEvents() const override;
87 
89  unsigned int getNumberOfSpectators() const override;
90 
92  void loadEvent(unsigned int iEvent) override;
93 
95  std::vector<float> getFeature(unsigned int iFeature) override;
96 
98  std::vector<float> getSpectator(unsigned int iSpectator) override;
99 
101  std::vector<float> getWeights() override;
102 
104  std::vector<float> getTargets() override;
105 
106  private:
108  double m_cutValue;
109 
112  };
113 
138  template<class BaseClassifierTeacher, class RegressionClassifierOptions>
139  class RegressionTeacher : public Teacher {
140  public:
142  RegressionTeacher(const GeneralOptions& general_options,
143  const RegressionClassifierOptions& specific_options);
144 
155  Weightfile train(Dataset& training_data) const override;
156 
157  private:
159  RegressionClassifierOptions m_specific_options;
161  BaseClassifierTeacher m_baseClassifierTeacher;
162  };
163 
178  template<class BaseClassifierExpert, class RegressionClassifierOptions>
179  class RegressionExpert : public Expert {
180  public:
182  void load(Weightfile& weightfile) override;
183 
185  std::vector<float> apply(Dataset& test_data) const override;
186 
187  private:
189  std::vector<BaseClassifierExpert> m_baseClassifierExperts;
190  };
191  }
193 }
194 
195 
196 template<class BaseClassifierOptions>
197 void Belle2::MVA::RegressionOptions<BaseClassifierOptions>::load(const boost::property_tree::ptree& pt)
198 {
199  m_numberOfBins = pt.get<unsigned int>(getMethod() + "_numberOfBins");
200 }
201 
202 template<class BaseClassifierOptions>
204 {
205  return m_numberOfBins;
206 }
207 
208 template<class BaseClassifierOptions>
210 {
211  m_numberOfBins = maximalBinNumber;
212 }
213 
214 template<class BaseClassifierOptions>
216 {
217  return m_baseClassifierOptions;
218 }
219 
220 template<class BaseClassifierOptions>
222 {
223  return m_baseClassifierOptions;
224 }
225 
226 template<class BaseClassifierOptions>
227 void Belle2::MVA::RegressionOptions<BaseClassifierOptions>::save(boost::property_tree::ptree& pt) const
228 {
229  pt.put(getMethod() + "_numberOfBins", m_numberOfBins);
230 }
231 
232 template<class BaseClassifierOptions>
234 {
235  auto description = getBaseClassifierOptions().getDescription();
236 
237  description.add_options()
238  ("numberOfBins", po::value<unsigned int>(&m_numberOfBins),
239  "Number of bins to split the target variable into");
240 
241  return description;
242 }
243 
244 template<class BaseClassifierTeacher, class RegressionClassifierOptions>
246  Belle2::MVA::Dataset& training_data) const
247 {
248  Weightfile weightfile;
249  weightfile.addOptions(m_general_options);
250  weightfile.addOptions(m_specific_options);
251 
252  for (unsigned int binNumber = 1; binNumber < m_specific_options.getMaximalBinNumber(); binNumber++) {
253  RegressionDataSet specificDataSet(m_general_options, &training_data,
254  1.0 / m_specific_options.getMaximalBinNumber() * binNumber);
255  Weightfile specificWeightFile = m_baseClassifierTeacher.train(specificDataSet);
256 
257  std::string file = weightfile.generateFileName();
258  Weightfile::saveToXMLFile(specificWeightFile, file);
259  weightfile.addFile("BaseClassifier_WeightFile_" + std::to_string(binNumber), file);
260  }
261  return weightfile;
262 }
263 
264 template<class BaseClassifierTeacher, class RegressionClassifierOptions>
266  const Belle2::MVA::GeneralOptions& general_options,
267  const RegressionClassifierOptions& specific_options) :
268  Teacher(general_options), m_specific_options(specific_options),
269  m_baseClassifierTeacher(general_options, specific_options.getBaseClassifierOptions())
270 {
271 }
272 
273 template<class BaseClassifierExpert, class RegressionClassifierOptions>
275 {
276  RegressionClassifierOptions specific_options;
277  weightfile.getOptions(specific_options);
278 
279  m_baseClassifierExperts.resize(specific_options.getMaximalBinNumber());
280  for (unsigned int binNumber = 1; binNumber < specific_options.getMaximalBinNumber(); binNumber++) {
281  std::string file = weightfile.generateFileName();
282  weightfile.getFile("BaseClassifier_WeightFile_" + std::to_string(binNumber), file);
283 
284  auto specificWeightFile = Weightfile::loadFromXMLFile(file);
285  m_baseClassifierExperts[binNumber].load(specificWeightFile);
286  }
287 }
288 
289 template<class BaseClassifierExpert, class RegressionClassifierOptions>
291  Dataset& test_data) const
292 {
293  std::vector<float> sum;
294  for (const auto& expert : m_baseClassifierExperts) {
295  if (sum.empty()) {
296  // First time we do not need to add something, but just replace
297  sum = expert.apply(test_data);
298  continue;
299  }
300 
301  const auto& expertResult = expert.apply(test_data);
302  if (sum.size() != expertResult.size()) {
303  B2FATAL("There is one expert in the list that returned not the same number of results than the others!");
304  }
305 
306  for (unsigned int index = 0; index < sum.size(); index++) {
307  sum[index] += expertResult[index];
308  }
309  }
310 
311  for (unsigned int index = 0; index < sum.size(); index++) {
312  sum[index] /= m_baseClassifierExperts.size();
313  }
314 
315  return sum;
316 }
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
Abstract base class of all Expert Each MVA library has its own implementation of this class,...
Definition: Expert.h:31
General options which are shared by all MVA trainings.
Definition: Options.h:62
Dataset needed during the training of a regression method.
Definition: Regression.h:77
std::vector< float > getTargets() override
Return the targets from the real dataset.
Definition: Regression.cc:61
unsigned int getNumberOfEvents() const override
Return the number of events from the real dataset.
Definition: Regression.cc:36
std::vector< float > getSpectator(unsigned int iSpectator) override
Return a specific spectator from the real dataset.
Definition: Regression.cc:51
unsigned int getNumberOfSpectators() const override
Return the number of spectators from the real dataset.
Definition: Regression.cc:41
unsigned int getNumberOfFeatures() const override
Return the number of features from the real dataset.
Definition: Regression.cc:31
std::vector< float > getFeature(unsigned int iFeature) override
Return a specific feature from the real dataset.
Definition: Regression.cc:46
std::vector< float > getWeights() override
Return the weights from the real dataset.
Definition: Regression.cc:56
void loadEvent(unsigned int iEvent) override
Load an event. Sets all internal variables and sets the isSignal variable dependent on the cut value.
Definition: Regression.cc:21
Dataset * m_childDataSet
The real data set (our child)
Definition: Regression.h:111
double m_cutValue
The cut value.
Definition: Regression.h:108
RegressionDataSet(const GeneralOptions &general_options, Dataset *dataSet, double cutValue)
Create a new regression data set out of the general options, a pointer to the real dataset and the cu...
Definition: Regression.cc:16
Generic expert for the regression applications.
Definition: Regression.h:179
std::vector< float > apply(Dataset &test_data) const override
Apply the loaded experts by averaging over the single expert decisions.
Definition: Regression.h:290
std::vector< BaseClassifierExpert > m_baseClassifierExperts
The list of single experts.
Definition: Regression.h:189
void load(Weightfile &weightfile) override
Load the expert from the weightfile by loading each stored single classifier one ofter the other.
Definition: Regression.h:274
Generic options of the Regression MVA methods hosting the number of bins (and the base classifier opt...
Definition: Regression.h:34
BaseClassifierOptions m_baseClassifierOptions
Options of the base classifier.
Definition: Regression.h:59
const BaseClassifierOptions & getBaseClassifierOptions() const
Return options of the base classifier (const version)
Definition: Regression.h:215
unsigned int getMaximalBinNumber() const
Return the number of bins to use.
Definition: Regression.h:203
BaseClassifierOptions & getBaseClassifierOptions()
Return options of the base classifier (non-const version)
Definition: Regression.h:221
unsigned int m_numberOfBins
How many bins to use.
Definition: Regression.h:61
po::options_description getDescription() override
Get the descriptions for these options.
Definition: Regression.h:233
void load(const boost::property_tree::ptree &pt) override
Load the options from a boost property tree. Only loads the maximal bin number.
Definition: Regression.h:197
void setMaximalBinNumber(unsigned int maximalBinNumber)
Set the number of bins to use.
Definition: Regression.h:209
void save(boost::property_tree::ptree &pt) const override
Save the options from a boost property tree. Only saves the maximal bin number.
Definition: Regression.h:227
Core class for the training of regression methods based on binary classifiers.
Definition: Regression.h:139
BaseClassifierTeacher m_baseClassifierTeacher
The teacher to use for training.
Definition: Regression.h:161
RegressionClassifierOptions m_specific_options
The method specific options.
Definition: Regression.h:159
RegressionTeacher(const GeneralOptions &general_options, const RegressionClassifierOptions &specific_options)
Create a new teacher out of the general and the regression method specific options.
Definition: Regression.h:265
Weightfile train(Dataset &training_data) const override
Call the train function.
Definition: Regression.h:245
Specific Options, all method Options have to inherit from this class.
Definition: Options.h:98
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:29
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
Definition: Weightfile.cc:115
static Weightfile loadFromXMLFile(const std::string &filename)
Static function which loads a Weightfile from a XML file.
Definition: Weightfile.cc:240
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition: Weightfile.cc:62
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:67
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
Definition: Weightfile.cc:105
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Definition: Weightfile.cc:138
Abstract base class for different kinds of events.