10 #include <mva/methods/TMVA.h>
11 #include <mva/interface/Interface.h>
12 #include <framework/utilities/FileSystem.h>
13 #include <framework/utilities/TestHelpers.h>
15 #include <gtest/gtest.h>
21 TEST(TMVATest, TMVAOptions)
28 EXPECT_EQ(specific_options.m_method,
"BDT");
29 EXPECT_EQ(specific_options.m_type,
"BDT");
30 EXPECT_EQ(specific_options.m_config,
31 "!H:!V:CreateMVAPdfs:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining");
32 EXPECT_EQ(specific_options.m_factoryOption,
"!V:!Silent:Color:DrawProgressBar");
33 EXPECT_EQ(specific_options.m_prepareOption,
"SplitMode=random:!V");
34 EXPECT_EQ(specific_options.m_workingDirectory,
"");
35 EXPECT_EQ(specific_options.m_prefix,
"TMVA");
37 specific_options.m_method =
"Method";
38 specific_options.m_type =
"Type";
39 specific_options.m_config =
"Config";
40 specific_options.m_factoryOption =
"FactoryOption";
41 specific_options.m_prepareOption =
"PrepareOption";
42 specific_options.m_workingDirectory =
"WorkingDirectory";
43 specific_options.m_prefix =
"Prefix";
45 boost::property_tree::ptree pt;
46 specific_options.save(pt);
47 EXPECT_EQ(pt.get<std::string>(
"TMVA_method"),
"Method");
48 EXPECT_EQ(pt.get<std::string>(
"TMVA_type"),
"Type");
49 EXPECT_EQ(pt.get<std::string>(
"TMVA_config"),
"Config");
50 EXPECT_EQ(pt.get<std::string>(
"TMVA_factoryOption"),
"FactoryOption");
51 EXPECT_EQ(pt.get<std::string>(
"TMVA_prepareOption"),
"PrepareOption");
52 EXPECT_EQ(pt.get<std::string>(
"TMVA_workingDirectory"),
"WorkingDirectory");
53 EXPECT_EQ(pt.get<std::string>(
"TMVA_prefix"),
"Prefix");
56 specific_options2.
load(pt);
58 EXPECT_EQ(specific_options2.
m_method,
"Method");
59 EXPECT_EQ(specific_options2.
m_type,
"Type");
60 EXPECT_EQ(specific_options2.
m_config,
"Config");
64 EXPECT_EQ(specific_options2.
m_prefix,
"Prefix");
68 EXPECT_EQ(specific_classification_options.
m_factoryOption,
"!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification");
71 boost::property_tree::ptree pt_classification;
72 specific_classification_options.
save(pt_classification);
73 EXPECT_EQ(pt_classification.get<
bool>(
"TMVA_transform2probability"),
false);
76 specific_classification_options2.
load(pt_classification);
80 EXPECT_EQ(specific_regression_options.
m_factoryOption,
"!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression");
82 EXPECT_EQ(specific_classification_options.
getMethod(), std::string(
"TMVAClassification"));
83 EXPECT_EQ(specific_regression_options.
getMethod(), std::string(
"TMVARegression"));
86 auto description = specific_options.getDescription();
87 EXPECT_EQ(description.options().size(), 6);
89 auto description_reg = specific_regression_options.
getDescription();
90 EXPECT_EQ(description_reg.options().size(), 6);
92 auto description_cls = specific_classification_options.
getDescription();
93 EXPECT_EQ(description_cls.options().size(), 7);
97 pt.put(
"TMVA_version", 100);
99 EXPECT_B2ERROR(specific_options2.
load(pt));
103 EXPECT_THROW(specific_options2.
load(pt), std::runtime_error);
109 explicit TestClassificationDataset(
const std::vector<float>& data) : MVA::Dataset(MVA::GeneralOptions()), m_data(data)
117 [[nodiscard]]
unsigned int getNumberOfFeatures()
const override {
return 1; }
118 [[nodiscard]]
unsigned int getNumberOfSpectators()
const override {
return 0; }
119 [[nodiscard]]
unsigned int getNumberOfEvents()
const override {
return m_data.size(); }
120 void loadEvent(
unsigned int iEvent)
override { m_input[0] = m_data[iEvent]; m_target = iEvent % 2; m_isSignal = m_target == 1; };
121 float getSignalFraction()
override {
return 0.1; };
122 std::vector<float> getFeature(
unsigned int)
override {
return m_data; }
124 std::vector<float> m_data;
129 TEST(TMVATest, TMVAClassificationInterface)
135 general_options.m_variables = {
"A"};
136 general_options.m_target_variable =
"Target";
138 specific_options.m_prepareOption =
"SplitMode=block:!V";
139 specific_options.transform2probability =
false;
140 specific_options.m_config =
141 "!H:!V:CreateMVAPdfs:NTrees=400:BoostType=Grad:Shrinkage=0.1:nCuts=10:MaxDepth=3:IgnoreNegWeightsInTraining:MinNodeSize=20";
142 TestClassificationDataset dataset({1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
143 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
144 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
145 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
146 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
147 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
148 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
149 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0
152 auto teacher = interface.
getTeacher(general_options, specific_options);
153 auto weightfile = teacher->train(dataset);
156 expert->load(weightfile);
157 auto probabilities = expert->apply(dataset);
158 EXPECT_EQ(probabilities.size(), dataset.getNumberOfEvents());
159 for (
unsigned int i = 0; i < 24; ++i) {
160 EXPECT_LE(probabilities[i], 0.1);
161 EXPECT_GE(probabilities[i], -0.1);
163 for (
unsigned int i = 24; i < 48; i += 2) {
164 EXPECT_LE(probabilities[i], -0.8);
165 EXPECT_GE(probabilities[i + 1], 0.8);
167 for (
unsigned int i = 48; i < 72; ++i) {
168 EXPECT_LE(probabilities[i], 0.1);
169 EXPECT_GE(probabilities[i], -0.1);
171 for (
unsigned int i = 72; i < 96; i += 2) {
172 EXPECT_LE(probabilities[i], -0.8);
173 EXPECT_GE(probabilities[i + 1], 0.8);
182 explicit TestRegressionDataset(
const std::vector<float>& data) : MVA::Dataset(MVA::GeneralOptions()), m_data(data)
190 [[nodiscard]]
unsigned int getNumberOfFeatures()
const override {
return 1; }
191 [[nodiscard]]
unsigned int getNumberOfSpectators()
const override {
return 0; }
192 [[nodiscard]]
unsigned int getNumberOfEvents()
const override {
return m_data.size(); }
193 void loadEvent(
unsigned int iEvent)
override { m_input[0] = m_data[iEvent]; m_target =
static_cast<float>((
static_cast<int>(iEvent % 48) - 24) / 4) / 24.0;};
194 float getSignalFraction()
override {
return 0.0; };
195 std::vector<float> getFeature(
unsigned int)
override {
return m_data; }
197 std::vector<float> m_data;
201 TEST(TMVATest, TMVARegressionInterface)
206 general_options.m_variables = {
"A"};
207 general_options.m_target_variable =
"Target";
209 specific_options.m_prepareOption =
"SplitMode=block:!V";
210 specific_options.m_config =
"!H:!V:NTrees=200::BoostType=Grad:Shrinkage=0.1:nCuts=24:MaxDepth=3";
212 TestRegressionDataset dataset({1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
213 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0,
214 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0,
215 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0,
216 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
217 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0,
218 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0,
219 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0
222 auto teacher = interface.
getTeacher(general_options, specific_options);
223 auto weightfile = teacher->train(dataset);
226 expert->load(weightfile);
227 auto values = expert->apply(dataset);
228 EXPECT_EQ(values.size(), dataset.getNumberOfEvents());
229 for (
unsigned int i = 0; i < 96; i += 4) {
230 float r =
static_cast<float>((
static_cast<int>(i % 48) - 24) / 4) / 24.0;
231 EXPECT_LE(values[i], r + 0.05);
232 EXPECT_GE(values[i], r - 0.05);
233 EXPECT_LE(values[i + 1], r + 0.05);
234 EXPECT_GE(values[i + 1], r - 0.05);
235 EXPECT_LE(values[i + 2], r + 0.05);
236 EXPECT_GE(values[i + 2], r - 0.05);
237 EXPECT_LE(values[i + 3], r + 0.05);
238 EXPECT_GE(values[i + 3], r - 0.05);
243 TEST(TMVATest, WeightfilesAreReadCorrectly)
249 general_options.m_variables = {
"M",
"p",
"pt"};
251 {1.873689, 1.881940, 1.843310},
252 {1.863657, 1.774831, 1.753773},
253 {1.858293, 1.605311, 0.631336},
254 {1.837129, 1.575739, 1.490166},
255 {1.811395, 1.524029, 0.565220}
257 {}, {0.0, 1.0, 0.0, 1.0, 0.0, 1.0});
262 expert->load(weightfile);
263 auto probabilities = expert->apply(dataset);
264 EXPECT_NEAR(probabilities[0], 0.098980136215686798, 0.0001);
265 EXPECT_NEAR(probabilities[1], 0.35516414046287537, 0.0001);
266 EXPECT_NEAR(probabilities[2], 0.066082566976547241, 0.0001);
267 EXPECT_NEAR(probabilities[3], 0.18826344609260559, 0.0001);
268 EXPECT_NEAR(probabilities[4], 0.10691597312688828, 0.0001);
269 EXPECT_NEAR(probabilities[5], 1.4245844629813542e-13, 0.0001);