9#include <mva/methods/FastBDT.h>
10#include <mva/interface/Interface.h>
11#include <mva/interface/Dataset.h>
12#include <framework/utilities/FileSystem.h>
13#include <framework/utilities/TestHelpers.h>
15#include <gtest/gtest.h>
21 TEST(FastBDTTest, FastBDTOptions)
25 EXPECT_EQ(specific_options.m_nTrees, 200);
26 EXPECT_EQ(specific_options.m_nCuts, 8);
27 EXPECT_EQ(specific_options.m_nLevels, 3);
28 EXPECT_FLOAT_EQ(specific_options.m_shrinkage, 0.1);
29 EXPECT_FLOAT_EQ(specific_options.m_randRatio, 0.5);
30 EXPECT_EQ(specific_options.m_sPlot,
false);
31 EXPECT_EQ(specific_options.m_individual_nCuts.size(), 0);
32 EXPECT_EQ(specific_options.m_individualPurityTransformation.size(), 0);
33 EXPECT_EQ(specific_options.m_purityTransformation,
false);
34 EXPECT_FLOAT_EQ(specific_options.m_flatnessLoss, -1.0);
36 specific_options.m_nTrees = 100;
37 specific_options.m_nCuts = 10;
38 specific_options.m_nLevels = 2;
39 specific_options.m_shrinkage = 0.2;
40 specific_options.m_randRatio = 0.8;
41 specific_options.m_individual_nCuts = {2, 3, 4};
42 specific_options.m_flatnessLoss = 0.3;
43 specific_options.m_sPlot =
true;
44 specific_options.m_purityTransformation =
true;
45 specific_options.m_individualPurityTransformation = {
true,
false,
true};
47 boost::property_tree::ptree pt;
48 specific_options.save(pt);
49 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_nTrees"), 100);
50 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_nCuts"), 10);
51 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_nLevels"), 2);
52 EXPECT_FLOAT_EQ(pt.get<
double>(
"FastBDT_shrinkage"), 0.2);
53 EXPECT_FLOAT_EQ(pt.get<
double>(
"FastBDT_randRatio"), 0.8);
54 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_number_individual_nCuts"), 3);
55 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_individual_nCuts0"), 2);
56 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_individual_nCuts1"), 3);
57 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_individual_nCuts2"), 4);
58 EXPECT_EQ(pt.get<
bool>(
"FastBDT_sPlot"),
true);
59 EXPECT_FLOAT_EQ(pt.get<
double>(
"FastBDT_flatnessLoss"), 0.3);
60 EXPECT_EQ(pt.get<
bool>(
"FastBDT_purityTransformation"),
true);
61 EXPECT_EQ(pt.get<
unsigned int>(
"FastBDT_number_individualPurityTransformation"), 3);
62 EXPECT_EQ(pt.get<
bool>(
"FastBDT_individualPurityTransformation0"),
true);
63 EXPECT_EQ(pt.get<
bool>(
"FastBDT_individualPurityTransformation1"),
false);
64 EXPECT_EQ(pt.get<
bool>(
"FastBDT_individualPurityTransformation2"),
true);
67 specific_options2.
load(pt);
69 EXPECT_EQ(specific_options2.
m_nTrees, 100);
70 EXPECT_EQ(specific_options2.
m_nCuts, 10);
71 EXPECT_EQ(specific_options2.
m_nLevels, 2);
72 EXPECT_FLOAT_EQ(specific_options2.
m_shrinkage, 0.2);
73 EXPECT_FLOAT_EQ(specific_options2.
m_randRatio, 0.8);
74 EXPECT_EQ(specific_options2.
m_sPlot,
true);
86 EXPECT_EQ(specific_options.getMethod(), std::string(
"FastBDT"));
89 auto description = specific_options.getDescription();
91 EXPECT_EQ(description.options().size(), 10);
95 pt.put(
"FastBDT_version", 100);
97 EXPECT_B2ERROR(specific_options2.
load(pt));
101 EXPECT_THROW(specific_options2.
load(pt), std::runtime_error);
106 explicit TestDataset(
const std::vector<float>& data) : MVA::Dataset(MVA::GeneralOptions()), m_data(data)
116 [[nodiscard]]
unsigned int getNumberOfEvents()
const override {
return m_data.size(); }
117 void loadEvent(
unsigned int iEvent)
override { m_input[0] = m_data[iEvent]; m_target = iEvent % 2; m_isSignal = m_target == 1; };
119 std::vector<float>
getFeature(
unsigned int)
override {
return m_data; }
121 std::vector<float> m_data;
126 TEST(FastBDTTest, FastBDTInterface)
131 general_options.m_variables = {
"A"};
133 specific_options.m_randRatio = 1.0;
134 TestDataset dataset({1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0});
136 auto teacher = interface.
getTeacher(general_options, specific_options);
137 auto weightfile = teacher->train(dataset);
140 expert->load(weightfile);
141 auto probabilities = expert->apply(dataset);
142 EXPECT_EQ(probabilities.size(), dataset.getNumberOfEvents());
143 for (
unsigned int i = 0; i < 4; ++i) {
144 EXPECT_LE(probabilities[i], 0.6);
145 EXPECT_GE(probabilities[i], 0.4);
147 EXPECT_LE(probabilities[4], 0.2);
148 EXPECT_GE(probabilities[5], 0.8);
149 EXPECT_LE(probabilities[6], 0.2);
150 EXPECT_GE(probabilities[7], 0.8);
154 TEST(FastBDTTest, FastBDTInterfaceWithPurityTransformation)
159 general_options.m_variables = {
"A"};
161 specific_options.m_randRatio = 1.0;
162 specific_options.m_purityTransformation =
true;
163 TestDataset dataset({1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0});
165 auto teacher = interface.
getTeacher(general_options, specific_options);
166 auto weightfile = teacher->train(dataset);
169 expert->load(weightfile);
170 auto probabilities = expert->apply(dataset);
171 EXPECT_EQ(probabilities.size(), dataset.getNumberOfEvents());
172 for (
unsigned int i = 0; i < 4; ++i) {
173 EXPECT_LE(probabilities[i], 0.6);
174 EXPECT_GE(probabilities[i], 0.4);
176 EXPECT_LE(probabilities[4], 0.2);
177 EXPECT_GE(probabilities[5], 0.8);
178 EXPECT_LE(probabilities[6], 0.2);
179 EXPECT_GE(probabilities[7], 0.8);
183 TEST(FastBDTTest, WeightfilesOfDifferentVersionsAreConsistent)
188 general_options.m_variables = {
"M",
"p",
"pt"};
190 {1.873689, 1.881940, 1.843310},
191 {1.863657, 1.774831, 1.753773},
192 {1.858293, 1.605311, 0.631336},
193 {1.837129, 1.575739, 1.490166},
194 {1.811395, 1.524029, 0.565220}
196 {}, {0.0, 1.0, 0.0, 1.0, 0.0, 1.0});
201 expert->load(weightfile_v3);
202 auto probabilities_v3 = expert->apply(dataset);
203 EXPECT_NEAR(probabilities_v3[0], 0.0402499, 0.0001);
204 EXPECT_NEAR(probabilities_v3[1], 0.2189, 0.0001);
205 EXPECT_NEAR(probabilities_v3[2], 0.264094, 0.0001);
206 EXPECT_NEAR(probabilities_v3[3], 0.100049, 0.0001);
207 EXPECT_NEAR(probabilities_v3[4], 0.0664554, 0.0001);
208 EXPECT_NEAR(probabilities_v3[5], 0.00886221, 0.0001);
211 expert->load(weightfile_v5);
212 auto probabilities_v5 = expert->apply(dataset);
213 EXPECT_NEAR(probabilities_v5[0], 0.0402498, 0.0001);
214 EXPECT_NEAR(probabilities_v5[1], 0.218899, 0.0001);
215 EXPECT_NEAR(probabilities_v5[2], 0.264093, 0.0001);
216 EXPECT_NEAR(probabilities_v5[3], 0.100048, 0.0001);
217 EXPECT_NEAR(probabilities_v5[4], 0.0664551, 0.0001);
218 EXPECT_NEAR(probabilities_v5[5], 0.00886217, 0.0001);
224 EXPECT_NEAR(probabilities_v5[0], probabilities_v3[0], 0.001);
225 EXPECT_NEAR(probabilities_v5[1], probabilities_v3[1], 0.001);
226 EXPECT_NEAR(probabilities_v5[2], probabilities_v3[2], 0.001);
227 EXPECT_NEAR(probabilities_v5[3], probabilities_v3[3], 0.001);
228 EXPECT_NEAR(probabilities_v5[4], probabilities_v3[4], 0.001);
229 EXPECT_NEAR(probabilities_v5[5], probabilities_v3[5], 0.001);
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
virtual unsigned int getNumberOfEvents() const =0
Returns the number of events in this dataset.
virtual unsigned int getNumberOfSpectators() const =0
Returns the number of spectators in this dataset.
virtual unsigned int getNumberOfFeatures() const =0
Returns the number of features in this dataset.
virtual void loadEvent(unsigned int iEvent)=0
Load the event number iEvent.
virtual std::vector< float > getFeature(unsigned int iFeature)
Returns all values of one feature in a std::vector<float>
virtual float getSignalFraction()
Returns the signal fraction of the whole sample.
Options for the FANN MVA method.
std::vector< unsigned int > m_individual_nCuts
Number of cut Levels = log_2(Number of Cuts) for each provided feature.
bool m_sPlot
Activates sPlot sampling.
double m_randRatio
Fraction of data to use in the stochastic training.
double m_flatnessLoss
Flatness Loss constant.
double m_shrinkage
Shrinkage during the boosting step.
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
bool m_purityTransformation
Activates purity transformation globally for all features.
unsigned int m_nLevels
Depth of tree.
std::vector< bool > m_individualPurityTransformation
Vector which decided for each feature individually if the purity transformation should be used.
unsigned int m_nCuts
Number of cut Levels = log_2(Number of Cuts)
unsigned int m_nTrees
Number of trees.
General options which are shared by all MVA trainings.
Template class to easily construct a interface for an MVA library using a library-specific Options,...
virtual std::unique_ptr< MVA::Expert > getExpert() const override
Get Exoert of this MVA library.
virtual std::unique_ptr< Teacher > getTeacher(const GeneralOptions &general_options, const SpecificOptions &specific_options) const override
Get Teacher of this MVA library.
Wraps the data of a multiple event into a Dataset.
static Weightfile loadFromFile(const std::string &filename)
Static function which loads a Weightfile from a file.
Abstract base class for different kinds of events.