9#include <mva/methods/TMVA.h>
10#include <mva/interface/Interface.h>
11#include <framework/utilities/FileSystem.h>
12#include <framework/utilities/TestHelpers.h>
14#include <gtest/gtest.h>
20 TEST(TMVATest, TMVAOptions)
27 EXPECT_EQ(specific_options.m_method,
"BDT");
28 EXPECT_EQ(specific_options.m_type,
"BDT");
29 EXPECT_EQ(specific_options.m_config,
30 "!H:!V:CreateMVAPdfs:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining");
31 EXPECT_EQ(specific_options.m_factoryOption,
"!V:!Silent:Color:DrawProgressBar");
32 EXPECT_EQ(specific_options.m_prepareOption,
"SplitMode=random:!V");
33 EXPECT_EQ(specific_options.m_workingDirectory,
"");
34 EXPECT_EQ(specific_options.m_prefix,
"TMVA");
36 specific_options.m_method =
"Method";
37 specific_options.m_type =
"Type";
38 specific_options.m_config =
"Config";
39 specific_options.m_factoryOption =
"FactoryOption";
40 specific_options.m_prepareOption =
"PrepareOption";
41 specific_options.m_workingDirectory =
"WorkingDirectory";
42 specific_options.m_prefix =
"Prefix";
44 boost::property_tree::ptree pt;
45 specific_options.save(pt);
46 EXPECT_EQ(pt.get<std::string>(
"TMVA_method"),
"Method");
47 EXPECT_EQ(pt.get<std::string>(
"TMVA_type"),
"Type");
48 EXPECT_EQ(pt.get<std::string>(
"TMVA_config"),
"Config");
49 EXPECT_EQ(pt.get<std::string>(
"TMVA_factoryOption"),
"FactoryOption");
50 EXPECT_EQ(pt.get<std::string>(
"TMVA_prepareOption"),
"PrepareOption");
51 EXPECT_EQ(pt.get<std::string>(
"TMVA_workingDirectory"),
"WorkingDirectory");
52 EXPECT_EQ(pt.get<std::string>(
"TMVA_prefix"),
"Prefix");
55 specific_options2.
load(pt);
57 EXPECT_EQ(specific_options2.
m_method,
"Method");
58 EXPECT_EQ(specific_options2.
m_type,
"Type");
59 EXPECT_EQ(specific_options2.
m_config,
"Config");
63 EXPECT_EQ(specific_options2.
m_prefix,
"Prefix");
67 EXPECT_EQ(specific_classification_options.
m_factoryOption,
"!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification");
70 boost::property_tree::ptree pt_classification;
71 specific_classification_options.
save(pt_classification);
72 EXPECT_EQ(pt_classification.get<
bool>(
"TMVA_transform2probability"),
false);
75 specific_classification_options2.
load(pt_classification);
79 EXPECT_EQ(specific_regression_options.
m_factoryOption,
"!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression");
81 EXPECT_EQ(specific_classification_options.
getMethod(), std::string(
"TMVAClassification"));
82 EXPECT_EQ(specific_regression_options.
getMethod(), std::string(
"TMVARegression"));
85 auto description = specific_options.getDescription();
86 EXPECT_EQ(description.options().size(), 6);
88 auto description_reg = specific_regression_options.
getDescription();
89 EXPECT_EQ(description_reg.options().size(), 6);
91 auto description_cls = specific_classification_options.
getDescription();
92 EXPECT_EQ(description_cls.options().size(), 7);
96 pt.put(
"TMVA_version", 100);
98 EXPECT_B2ERROR(specific_options2.
load(pt));
102 EXPECT_THROW(specific_options2.
load(pt), std::runtime_error);
108 explicit TestClassificationDataset(
const std::vector<float>& data) : MVA::Dataset(MVA::GeneralOptions()), m_data(data)
118 [[nodiscard]]
unsigned int getNumberOfEvents()
const override {
return m_data.size(); }
119 void loadEvent(
unsigned int iEvent)
override { m_input[0] = m_data[iEvent]; m_target = iEvent % 2; m_isSignal = m_target == 1; };
121 std::vector<float>
getFeature(
unsigned int)
override {
return m_data; }
123 std::vector<float> m_data;
128 TEST(TMVATest, TMVAClassificationInterface)
134 general_options.m_variables = {
"A"};
135 general_options.m_target_variable =
"Target";
137 specific_options.m_prepareOption =
"SplitMode=block:!V";
138 specific_options.transform2probability =
false;
139 specific_options.m_config =
140 "!H:!V:CreateMVAPdfs:NTrees=400:BoostType=Grad:Shrinkage=0.1:nCuts=10:MaxDepth=3:IgnoreNegWeightsInTraining:MinNodeSize=20";
141 TestClassificationDataset dataset({1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
142 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
143 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
144 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
145 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
146 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
147 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
148 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0
151 auto teacher = interface.
getTeacher(general_options, specific_options);
152 auto weightfile = teacher->train(dataset);
155 expert->load(weightfile);
156 auto probabilities = expert->apply(dataset);
157 EXPECT_EQ(probabilities.size(), dataset.getNumberOfEvents());
158 for (
unsigned int i = 0; i < 24; ++i) {
159 EXPECT_LE(probabilities[i], 0.1);
160 EXPECT_GE(probabilities[i], -0.1);
162 for (
unsigned int i = 24; i < 48; i += 2) {
163 EXPECT_LE(probabilities[i], -0.8);
164 EXPECT_GE(probabilities[i + 1], 0.8);
166 for (
unsigned int i = 48; i < 72; ++i) {
167 EXPECT_LE(probabilities[i], 0.1);
168 EXPECT_GE(probabilities[i], -0.1);
170 for (
unsigned int i = 72; i < 96; i += 2) {
171 EXPECT_LE(probabilities[i], -0.8);
172 EXPECT_GE(probabilities[i + 1], 0.8);
181 explicit TestRegressionDataset(
const std::vector<float>& data) : MVA::Dataset(MVA::GeneralOptions()), m_data(data)
191 [[nodiscard]]
unsigned int getNumberOfEvents()
const override {
return m_data.size(); }
192 void loadEvent(
unsigned int iEvent)
override { m_input[0] = m_data[iEvent]; m_target =
static_cast<float>((
static_cast<int>(iEvent % 48) - 24) / 4) / 24.0;};
194 std::vector<float>
getFeature(
unsigned int)
override {
return m_data; }
196 std::vector<float> m_data;
200 TEST(TMVATest, TMVARegressionInterface)
205 general_options.m_variables = {
"A"};
206 general_options.m_target_variable =
"Target";
208 specific_options.m_prepareOption =
"SplitMode=block:!V";
209 specific_options.m_config =
"!H:!V:NTrees=200::BoostType=Grad:Shrinkage=0.1:nCuts=24:MaxDepth=3";
211 TestRegressionDataset dataset({1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
212 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0,
213 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0,
214 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0,
215 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
216 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0,
217 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0,
218 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0
221 auto teacher = interface.
getTeacher(general_options, specific_options);
222 auto weightfile = teacher->train(dataset);
225 expert->load(weightfile);
226 auto values = expert->apply(dataset);
227 EXPECT_EQ(values.size(), dataset.getNumberOfEvents());
228 for (
unsigned int i = 0; i < 96; i += 4) {
229 float r =
static_cast<float>((
static_cast<int>(i % 48) - 24) / 4) / 24.0;
230 EXPECT_LE(values[i], r + 0.05);
231 EXPECT_GE(values[i], r - 0.05);
232 EXPECT_LE(values[i + 1], r + 0.05);
233 EXPECT_GE(values[i + 1], r - 0.05);
234 EXPECT_LE(values[i + 2], r + 0.05);
235 EXPECT_GE(values[i + 2], r - 0.05);
236 EXPECT_LE(values[i + 3], r + 0.05);
237 EXPECT_GE(values[i + 3], r - 0.05);
242 TEST(TMVATest, WeightfilesAreReadCorrectly)
248 general_options.m_variables = {
"M",
"p",
"pt"};
250 {1.873689, 1.881940, 1.843310},
251 {1.863657, 1.774831, 1.753773},
252 {1.858293, 1.605311, 0.631336},
253 {1.837129, 1.575739, 1.490166},
254 {1.811395, 1.524029, 0.565220}
256 {}, {0.0, 1.0, 0.0, 1.0, 0.0, 1.0});
261 expert->load(weightfile);
262 auto probabilities = expert->apply(dataset);
263 EXPECT_NEAR(probabilities[0], 0.098980136215686798, 0.0001);
264 EXPECT_NEAR(probabilities[1], 0.35516414046287537, 0.0001);
265 EXPECT_NEAR(probabilities[2], 0.066082566976547241, 0.0001);
266 EXPECT_NEAR(probabilities[3], 0.18826344609260559, 0.0001);
267 EXPECT_NEAR(probabilities[4], 0.10691597312688828, 0.0001);
268 EXPECT_NEAR(probabilities[5], 1.4245844629813542e-13, 0.0001);
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
virtual unsigned int getNumberOfEvents() const =0
Returns the number of events in this dataset.
virtual unsigned int getNumberOfSpectators() const =0
Returns the number of spectators in this dataset.
virtual unsigned int getNumberOfFeatures() const =0
Returns the number of features in this dataset.
virtual void loadEvent(unsigned int iEvent)=0
Load the event number iEvent.
virtual std::vector< float > getFeature(unsigned int iFeature)
Returns all values of one feature in a std::vector<float>
virtual float getSignalFraction()
Returns the signal fraction of the whole sample.
General options which are shared by all MVA trainings.
Template class to easily construct a interface for an MVA library using a library-specific Options,...
virtual std::unique_ptr< MVA::Expert > getExpert() const override
Get Exoert of this MVA library.
virtual std::unique_ptr< Teacher > getTeacher(const GeneralOptions &general_options, const SpecificOptions &specific_options) const override
Get Teacher of this MVA library.
Wraps the data of a multiple event into a Dataset.
Options for the TMVA Classification MVA method.
virtual std::string getMethod() const override
Return method name.
virtual po::options_description getDescription() override
Returns a program options description for all available options.
bool transform2probability
Transform output of method to a probability.
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism to store Options in a xml tree.
Options for the TMVA Regression MVA method.
virtual std::string getMethod() const override
Return method name.
Options for the TMVA MVA method.
std::string m_prepareOption
Prepare options passed to prepareTrainingAndTestTree method.
std::string m_prefix
Prefix used for all files generated by TMVA.
std::string m_config
TMVA config string for the chosen method.
std::string m_method
tmva method name
virtual po::options_description getDescription() override
Returns a program options description for all available options.
std::string m_factoryOption
Factory options passed to tmva factory.
std::string m_type
tmva method type
std::string m_workingDirectory
Working directory of TMVA, if empty a temporary directory is used.
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
static Weightfile loadFromFile(const std::string &filename)
Static function which loads a Weightfile from a file.
Abstract base class for different kinds of events.