Belle II Software  release-05-01-25
test_TMVA.cc
1 /* BASF2 (Belle Analysis Framework 2) *
2  * Copyright(C) 2016 - Belle II Collaboration *
3  * *
4  * Author: The Belle II Collaboration *
5  * Contributors: Thomas Keck *
6  * *
7  * This software is provided "as is" without any warranty. *
8  **************************************************************************/
9 
10 #include <mva/methods/TMVA.h>
11 #include <mva/interface/Interface.h>
12 #include <framework/utilities/FileSystem.h>
13 #include <framework/utilities/TestHelpers.h>
14 
15 #include <gtest/gtest.h>
16 
17 using namespace Belle2;
18 
19 namespace {
20 
21  TEST(TMVATest, TMVAOptions)
22  {
23  MVA::TMVAOptions specific_options;
24 
25  //EXPECT_EQ(specific_options.method, "FastBDT");
26  //EXPECT_EQ(specific_options.type, "Plugins");
27  //EXPECT_EQ(specific_options.config, "!H:!V:CreateMVAPdfs:NTrees=400:Shrinkage=0.10:RandRatio=0.5:NCutLevel=8:NTreeLayers=3");
28  EXPECT_EQ(specific_options.m_method, "BDT");
29  EXPECT_EQ(specific_options.m_type, "BDT");
30  EXPECT_EQ(specific_options.m_config,
31  "!H:!V:CreateMVAPdfs:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining");
32  EXPECT_EQ(specific_options.m_factoryOption, "!V:!Silent:Color:DrawProgressBar");
33  EXPECT_EQ(specific_options.m_prepareOption, "SplitMode=random:!V");
34  EXPECT_EQ(specific_options.m_workingDirectory, "");
35  EXPECT_EQ(specific_options.m_prefix, "TMVA");
36 
37  specific_options.m_method = "Method";
38  specific_options.m_type = "Type";
39  specific_options.m_config = "Config";
40  specific_options.m_factoryOption = "FactoryOption";
41  specific_options.m_prepareOption = "PrepareOption";
42  specific_options.m_workingDirectory = "WorkingDirectory";
43  specific_options.m_prefix = "Prefix";
44 
45  boost::property_tree::ptree pt;
46  specific_options.save(pt);
47  EXPECT_EQ(pt.get<std::string>("TMVA_method"), "Method");
48  EXPECT_EQ(pt.get<std::string>("TMVA_type"), "Type");
49  EXPECT_EQ(pt.get<std::string>("TMVA_config"), "Config");
50  EXPECT_EQ(pt.get<std::string>("TMVA_factoryOption"), "FactoryOption");
51  EXPECT_EQ(pt.get<std::string>("TMVA_prepareOption"), "PrepareOption");
52  EXPECT_EQ(pt.get<std::string>("TMVA_workingDirectory"), "WorkingDirectory");
53  EXPECT_EQ(pt.get<std::string>("TMVA_prefix"), "Prefix");
54 
55  MVA::TMVAOptions specific_options2;
56  specific_options2.load(pt);
57 
58  EXPECT_EQ(specific_options2.m_method, "Method");
59  EXPECT_EQ(specific_options2.m_type, "Type");
60  EXPECT_EQ(specific_options2.m_config, "Config");
61  EXPECT_EQ(specific_options2.m_factoryOption, "FactoryOption");
62  EXPECT_EQ(specific_options2.m_prepareOption, "PrepareOption");
63  EXPECT_EQ(specific_options2.m_workingDirectory, "WorkingDirectory");
64  EXPECT_EQ(specific_options2.m_prefix, "Prefix");
65 
66  MVA::TMVAOptionsClassification specific_classification_options;
67  EXPECT_EQ(specific_classification_options.transform2probability, true);
68  EXPECT_EQ(specific_classification_options.m_factoryOption, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification");
69 
70  specific_classification_options.transform2probability = false;
71  boost::property_tree::ptree pt_classification;
72  specific_classification_options.save(pt_classification);
73  EXPECT_EQ(pt_classification.get<bool>("TMVA_transform2probability"), false);
74 
75  MVA::TMVAOptionsClassification specific_classification_options2;
76  specific_classification_options2.load(pt_classification);
77  EXPECT_EQ(specific_classification_options.transform2probability, false);
78 
79  MVA::TMVAOptionsRegression specific_regression_options;
80  EXPECT_EQ(specific_regression_options.m_factoryOption, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression");
81 
82  EXPECT_EQ(specific_classification_options.getMethod(), std::string("TMVAClassification"));
83  EXPECT_EQ(specific_regression_options.getMethod(), std::string("TMVARegression"));
84 
85  // Test if po::options_description is created without crashing
86  auto description = specific_options.getDescription();
87  EXPECT_EQ(description.options().size(), 6);
88 
89  auto description_reg = specific_regression_options.getDescription();
90  EXPECT_EQ(description_reg.options().size(), 6);
91 
92  auto description_cls = specific_classification_options.getDescription();
93  EXPECT_EQ(description_cls.options().size(), 7);
94 
95  // Check for B2ERROR and throw if version is wrong
96  // we try with version 100, surely we will never reach this!
97  pt.put("TMVA_version", 100);
98  try {
99  EXPECT_B2ERROR(specific_options2.load(pt));
100  } catch (...) {
101 
102  }
103  EXPECT_THROW(specific_options2.load(pt), std::runtime_error);
104 
105  }
106 
107  class TestClassificationDataset : public MVA::Dataset {
108  public:
109  explicit TestClassificationDataset(const std::vector<float>& data) : MVA::Dataset(MVA::GeneralOptions()), m_data(data)
110  {
111  m_input = {0.0};
112  m_target = 0.0;
113  m_isSignal = false;
114  m_weight = 1.0;
115  }
116 
117  [[nodiscard]] unsigned int getNumberOfFeatures() const override { return 1; }
118  [[nodiscard]] unsigned int getNumberOfSpectators() const override { return 0; }
119  [[nodiscard]] unsigned int getNumberOfEvents() const override { return m_data.size(); }
120  void loadEvent(unsigned int iEvent) override { m_input[0] = m_data[iEvent]; m_target = iEvent % 2; m_isSignal = m_target == 1; };
121  float getSignalFraction() override { return 0.1; };
122  std::vector<float> getFeature(unsigned int) override { return m_data; }
123 
124  std::vector<float> m_data;
125 
126  };
127 
128 
129  TEST(TMVATest, TMVAClassificationInterface)
130  {
132  interface;
133 
134  MVA::GeneralOptions general_options;
135  general_options.m_variables = {"A"};
136  general_options.m_target_variable = "Target";
137  MVA::TMVAOptionsClassification specific_options;
138  specific_options.m_prepareOption = "SplitMode=block:!V";
139  specific_options.transform2probability = false;
140  specific_options.m_config =
141  "!H:!V:CreateMVAPdfs:NTrees=400:BoostType=Grad:Shrinkage=0.1:nCuts=10:MaxDepth=3:IgnoreNegWeightsInTraining:MinNodeSize=20";
142  TestClassificationDataset dataset({1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
143  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
144  2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
145  2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
146  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
147  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
148  2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
149  2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0
150  });
151 
152  auto teacher = interface.getTeacher(general_options, specific_options);
153  auto weightfile = teacher->train(dataset);
154 
155  auto expert = interface.getExpert();
156  expert->load(weightfile);
157  auto probabilities = expert->apply(dataset);
158  EXPECT_EQ(probabilities.size(), dataset.getNumberOfEvents());
159  for (unsigned int i = 0; i < 24; ++i) {
160  EXPECT_LE(probabilities[i], 0.1);
161  EXPECT_GE(probabilities[i], -0.1);
162  }
163  for (unsigned int i = 24; i < 48; i += 2) {
164  EXPECT_LE(probabilities[i], -0.8);
165  EXPECT_GE(probabilities[i + 1], 0.8);
166  }
167  for (unsigned int i = 48; i < 72; ++i) {
168  EXPECT_LE(probabilities[i], 0.1);
169  EXPECT_GE(probabilities[i], -0.1);
170  }
171  for (unsigned int i = 72; i < 96; i += 2) {
172  EXPECT_LE(probabilities[i], -0.8);
173  EXPECT_GE(probabilities[i + 1], 0.8);
174  }
175 
176 
177  }
178 
179 
180  class TestRegressionDataset : public MVA::Dataset {
181  public:
182  explicit TestRegressionDataset(const std::vector<float>& data) : MVA::Dataset(MVA::GeneralOptions()), m_data(data)
183  {
184  m_input = {0.0};
185  m_target = 0.0;
186  m_isSignal = false;
187  m_weight = 1.0;
188  }
189 
190  [[nodiscard]] unsigned int getNumberOfFeatures() const override { return 1; }
191  [[nodiscard]] unsigned int getNumberOfSpectators() const override { return 0; }
192  [[nodiscard]] unsigned int getNumberOfEvents() const override { return m_data.size(); }
193  void loadEvent(unsigned int iEvent) override { m_input[0] = m_data[iEvent]; m_target = static_cast<float>((static_cast<int>(iEvent % 48) - 24) / 4) / 24.0;};
194  float getSignalFraction() override { return 0.0; };
195  std::vector<float> getFeature(unsigned int) override { return m_data; }
196 
197  std::vector<float> m_data;
198 
199  };
200 
201  TEST(TMVATest, TMVARegressionInterface)
202  {
204 
205  MVA::GeneralOptions general_options;
206  general_options.m_variables = {"A"};
207  general_options.m_target_variable = "Target";
208  MVA::TMVAOptionsRegression specific_options;
209  specific_options.m_prepareOption = "SplitMode=block:!V";
210  specific_options.m_config = "!H:!V:NTrees=200::BoostType=Grad:Shrinkage=0.1:nCuts=24:MaxDepth=3";
211  //specific_options.config = "nCuts=120:NTrees=20:MaxDepth=4:BoostType=AdaBoostR2:SeparationType=RegressionVariance:MinNodeSize=10";
212  TestRegressionDataset dataset({1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
213  4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0,
214  7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0,
215  10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0,
216  1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
217  4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0,
218  7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0,
219  10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0
220  });
221 
222  auto teacher = interface.getTeacher(general_options, specific_options);
223  auto weightfile = teacher->train(dataset);
224 
225  auto expert = interface.getExpert();
226  expert->load(weightfile);
227  auto values = expert->apply(dataset);
228  EXPECT_EQ(values.size(), dataset.getNumberOfEvents());
229  for (unsigned int i = 0; i < 96; i += 4) {
230  float r = static_cast<float>((static_cast<int>(i % 48) - 24) / 4) / 24.0;
231  EXPECT_LE(values[i], r + 0.05);
232  EXPECT_GE(values[i], r - 0.05);
233  EXPECT_LE(values[i + 1], r + 0.05);
234  EXPECT_GE(values[i + 1], r - 0.05);
235  EXPECT_LE(values[i + 2], r + 0.05);
236  EXPECT_GE(values[i + 2], r - 0.05);
237  EXPECT_LE(values[i + 3], r + 0.05);
238  EXPECT_GE(values[i + 3], r - 0.05);
239  }
240 
241  }
242 
243  TEST(TMVATest, WeightfilesAreReadCorrectly)
244  {
246  interface;
247 
248  MVA::GeneralOptions general_options;
249  general_options.m_variables = {"M", "p", "pt"};
250  MVA::MultiDataset dataset(general_options, {{1.835127, 1.179507, 1.164944},
251  {1.873689, 1.881940, 1.843310},
252  {1.863657, 1.774831, 1.753773},
253  {1.858293, 1.605311, 0.631336},
254  {1.837129, 1.575739, 1.490166},
255  {1.811395, 1.524029, 0.565220}
256  },
257  {}, {0.0, 1.0, 0.0, 1.0, 0.0, 1.0});
258 
259  auto expert = interface.getExpert();
260 
261  auto weightfile = MVA::Weightfile::loadFromFile(FileSystem::findFile("mva/methods/tests/TMVA.xml"));
262  expert->load(weightfile);
263  auto probabilities = expert->apply(dataset);
264  EXPECT_NEAR(probabilities[0], 0.098980136215686798, 0.0001);
265  EXPECT_NEAR(probabilities[1], 0.35516414046287537, 0.0001);
266  EXPECT_NEAR(probabilities[2], 0.066082566976547241, 0.0001);
267  EXPECT_NEAR(probabilities[3], 0.18826344609260559, 0.0001);
268  EXPECT_NEAR(probabilities[4], 0.10691597312688828, 0.0001);
269  EXPECT_NEAR(probabilities[5], 1.4245844629813542e-13, 0.0001);
270  }
271 
272 }
Belle2::MVA::TMVAOptions::m_config
std::string m_config
TMVA config string for the chosen method.
Definition: TMVA.h:70
Belle2::MVA::MultiDataset
Wraps the data of a multiple event into a Dataset.
Definition: Dataset.h:187
Belle2::MVA::TMVAOptions::m_prepareOption
std::string m_prepareOption
Prepare options passed to prepareTrainingAndTestTree method.
Definition: TMVA.h:76
Belle2::MVA::TMVAOptionsClassification::load
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
Definition: TMVA.cc:79
Belle2::MVA::TMVAOptionsClassification::save
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism to store Options in a xml tree.
Definition: TMVA.cc:85
Belle2::MVA::Dataset
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:34
Belle2::MVA::TMVAOptionsClassification
Options for the TMVA Classification MVA method.
Definition: TMVA.h:84
Belle2::MVA::TMVAOptionsRegression::getMethod
virtual std::string getMethod() const override
Return method name.
Definition: TMVA.h:185
Belle2::MVA::TMVAOptionsClassification::transform2probability
bool transform2probability
Transform output of method to a probability.
Definition: TMVA.h:119
Belle2::MVA::TMVAOptions::getDescription
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: TMVA.cc:65
Belle2::MVA::TMVAOptionsClassification::getMethod
virtual std::string getMethod() const override
Return method name.
Definition: TMVA.h:116
Belle2::MVA::Interface::getTeacher
virtual std::unique_ptr< Teacher > getTeacher(const GeneralOptions &general_options, const SpecificOptions &specific_options) const override
Get Teacher of this MVA library.
Definition: Interface.h:119
Belle2::MVA::TMVAOptions::m_prefix
std::string m_prefix
Prefix used for all files generated by TMVA.
Definition: TMVA.h:78
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MVA::Weightfile::loadFromFile
static Weightfile loadFromFile(const std::string &filename)
Static function which loads a Weightfile from a file.
Definition: Weightfile.cc:215
Belle2::MVA::GeneralOptions
General options which are shared by all MVA trainings.
Definition: Options.h:64
Belle2::MVA::TMVAOptions::m_method
std::string m_method
tmva method name
Definition: TMVA.h:64
Belle2::TEST
TEST(TestgetDetectorRegion, TestgetDetectorRegion)
Test Constructors.
Definition: utilityFunctions.cc:18
Belle2::MVA::TMVAOptions::m_type
std::string m_type
tmva method type
Definition: TMVA.h:65
Belle2::MVA::TMVAOptionsClassification::getDescription
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: TMVA.cc:91
Belle2::MVA::TMVAOptions::m_factoryOption
std::string m_factoryOption
Factory options passed to tmva factory.
Definition: TMVA.h:75
Belle2::MVA::TMVAOptionsRegression
Options for the TMVA Regression MVA method.
Definition: TMVA.h:170
Belle2::FileSystem::findFile
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:147
Belle2::MVA::Interface::getExpert
virtual std::unique_ptr< MVA::Expert > getExpert() const override
Get Exoert of this MVA library.
Definition: Interface.h:128
Belle2::MVA::Interface
Template class to easily construct a interface for an MVA library using a library-specific Options,...
Definition: Interface.h:101
Belle2::MVA::TMVAOptions::m_workingDirectory
std::string m_workingDirectory
Working directory of TMVA, if empty a temporary directory is used.
Definition: TMVA.h:77
Belle2::MVA::TMVAOptions
Options for the TMVA MVA method.
Definition: TMVA.h:38
Belle2::MVA::TMVAOptions::load
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
Definition: TMVA.cc:37