Belle II Software development
Options.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <mva/interface/Options.h>
10#include <boost/property_tree/ptree.hpp>
11
12namespace Belle2 {
17 namespace MVA {
18
19 po::options_description GeneralOptions::getDescription()
20 {
21 po::options_description description("General options");
22 description.add_options()
23 ("help", "print this message")
24 ("datafiles", po::value<std::vector<std::string>>(&m_datafiles)->required()->multitoken(),
25 "ROOT files containing the training dataset")
26 ("treename", po::value<std::string>(&m_treename), "Name of tree in ROOT datafile")
27 ("identifier", po::value<std::string>(&m_identifier)->required(), "Identifier of the outputted weightfile")
28 ("variables", po::value<std::vector<std::string>>(&m_variables)->required()->multitoken(),
29 "feature variables used in the training")
30 ("spectators", po::value<std::vector<std::string>>(&m_spectators)->multitoken(),
31 "spectator variables used in the training")
32 ("target_variable", po::value<std::string>(&m_target_variable),
33 "target variable used to distinguish between signal and background, isSignal is used as default.")
34 ("signal_class", po::value<int>(&m_signal_class), "integer which identifies signal events")
35 ("nClasses", po::value<unsigned int>(&m_nClasses),
36 "number of classes under consideration. Must be supplied for multiclass classifications. Not all methods support multiclass classification.")
37 ("weight_variable", po::value<std::string>(&m_weight_variable), "weight variable used to weight each event")
38 ("max_events", po::value<unsigned int>(&m_max_events), "maximum number of events to process, 0 means all")
39 ("method", po::value<std::string>(&m_method)->required(),
40 "MVA Method [FastBDT|TMVAClassification|TMVARegression|Python|FANN|]");
41 return description;
42 }
43
44 void GeneralOptions::load(const boost::property_tree::ptree& pt)
45 {
46 m_method = pt.get<std::string>("method");
47 m_identifier = pt.get<std::string>("weightfile");
48 m_treename = pt.get<std::string>("treename");
49 m_target_variable = pt.get<std::string>("target_variable");
50 m_weight_variable = pt.get<std::string>("weight_variable");
51 m_signal_class = pt.get<int>("signal_class");
52 m_max_events = pt.get<unsigned int>("max_events", 0u);
53 m_nClasses = pt.get<unsigned int>("nClasses", 2u);
54
55 unsigned int numberOfFiles = pt.get<unsigned int>("number_data_files", 0);
56 m_datafiles.resize(numberOfFiles);
57 for (unsigned int i = 0; i < numberOfFiles; ++i) {
58 m_datafiles[i] = pt.get<std::string>(std::string("datafile") + std::to_string(i));
59 }
60
61 unsigned int numberOfSpectators = pt.get<unsigned int>("number_spectator_variables", 0u);
62 m_spectators.resize(numberOfSpectators);
63 for (unsigned int i = 0; i < numberOfSpectators; ++i) {
64 m_spectators[i] = pt.get<std::string>(std::string("spectator") + std::to_string(i));
65 }
66
67 auto numberOfFeatures = pt.get<unsigned int>("number_feature_variables");
68 m_variables.resize(numberOfFeatures);
69 for (unsigned int i = 0; i < numberOfFeatures; ++i) {
70 m_variables[i] = pt.get<std::string>(std::string("variable") + std::to_string(i));
71 }
72 }
73
74 void GeneralOptions::save(boost::property_tree::ptree& pt) const
75 {
76 pt.put("method", m_method);
77 pt.put("weightfile", m_identifier);
78 pt.put("treename", m_treename);
79 pt.put("target_variable", m_target_variable);
80 pt.put("weight_variable", m_weight_variable);
81 pt.put("signal_class", m_signal_class);
82 pt.put("max_events", m_max_events);
83 pt.put("nClasses", m_nClasses);
84
85 pt.put("number_feature_variables", m_variables.size());
86 for (unsigned int i = 0; i < m_variables.size(); ++i) {
87 pt.put(std::string("variable") + std::to_string(i), m_variables[i]);
88 }
89
90 pt.put("number_spectator_variables", m_spectators.size());
91 for (unsigned int i = 0; i < m_spectators.size(); ++i) {
92 pt.put(std::string("spectator") + std::to_string(i), m_spectators[i]);
93 }
94
95 pt.put("number_data_files", m_datafiles.size());
96 for (unsigned int i = 0; i < m_datafiles.size(); ++i) {
97 pt.put(std::string("datafile") + std::to_string(i), m_datafiles[i]);
98 }
99 }
100
101 po::options_description MetaOptions::getDescription()
102 {
103 po::options_description description("Meta options");
104 description.add_options()
105 ("use_splot", po::value<bool>(&m_use_splot), "whether to do an splot training")
106 ("splot_variable", po::value<std::string>(&m_splot_variable), "Variable used as discriminating variable in sPlot training")
107 ("splot_mc_files", po::value<std::vector<std::string>>(&m_splot_mc_files)->multitoken(),
108 "Monte carlo files containing the discriminant variable with the mc truth")
109 ("splot_combined", po::value<bool>(&m_splot_combined), "Combine sPlot training with PDF classifier for discriminating variable")
110 ("splot_boosted", po::value<bool>(&m_splot_boosted), "Use boosted sPlot training (aPlot)")
111 ("use_sideband_subtraction", po::value<bool>(&m_use_sideband_subtraction), "whether to do a sideband subtraction training")
112 ("sideband_mc_files", po::value<std::vector<std::string>>(&m_sideband_mc_files)->multitoken(),
113 "Monte carlo files used to estimate the number of events in the different regions. (Must contain the same signal / background distribution as is expected in data)")
114 ("sideband_variable", po::value<std::string>(&m_sideband_variable),
115 "Variable defining the signal region (1) background region (2) negative signal region (3) or unused (otherwise) for the sideband subtraction")
116 ("use_reweighting", po::value<bool>(&m_use_reweighting), "whether to do a reweighting pre training")
117 ("reweighting_variable", po::value<std::string>(&m_reweighting_variable),
118 "Variable defining for which events the reweighting should be used (1) or not used (0). If empty the reweighting is applied to all events")
119 ("reweighting_identifier", po::value<std::string>(&m_reweighting_identifier),
120 "Identifier used to save the reweighting expert.")
121 ("reweighting_mc_files", po::value<std::vector<std::string>>(&m_reweighting_mc_files)->multitoken(),
122 "Monte carlo files for the reweighting pretraining (Must contain the same luminosity as the given data files)")
123 ("reweighting_data_files", po::value<std::vector<std::string>>(&m_reweighting_data_files)->multitoken(),
124 "Data files for the reweighting pretraining (Must contain the same luminosity as the given MC files)");
125 return description;
126 }
127
128 void MetaOptions::load(const boost::property_tree::ptree& pt)
129 {
130 m_use_splot = pt.get<bool>("use_splot", false);
131 m_splot_combined = pt.get<bool>("splot_combined", false);
132 m_splot_boosted = pt.get<bool>("splot_boosted", false);
133 m_splot_variable = pt.get<std::string>("splot_variable", "");
134
135 unsigned int splot_number_of_mc_files = pt.get<unsigned int>("splot_number_of_mc_files", 0);
136 m_splot_mc_files.resize(splot_number_of_mc_files);
137 for (unsigned int i = 0; i < splot_number_of_mc_files; ++i) {
138 m_splot_mc_files[i] = pt.get<std::string>(std::string("splot_mc_file") + std::to_string(i));
139 }
140
141 m_use_sideband_subtraction = pt.get<bool>("use_sideband_subtraction");
142 m_sideband_variable = pt.get<std::string>("sideband_variable");
143
144 unsigned int sideband_number_of_mc_files = pt.get<unsigned int>("sideband_number_of_mc_files", 0);
145 m_sideband_mc_files.resize(sideband_number_of_mc_files);
146 for (unsigned int i = 0; i < sideband_number_of_mc_files; ++i) {
147 m_sideband_mc_files[i] = pt.get<std::string>(std::string("sideband_mc_file") + std::to_string(i));
148 }
149
150 m_use_reweighting = pt.get<bool>("use_reweighting", false);
151 m_reweighting_variable = pt.get<std::string>("reweighting_variable");
152 m_reweighting_identifier = pt.get<std::string>("reweighting_identifier");
153
154 unsigned int reweighting_number_of_mc_files = pt.get<unsigned int>("reweighting_number_of_mc_files", 0);
155 m_reweighting_mc_files.resize(reweighting_number_of_mc_files);
156 for (unsigned int i = 0; i < reweighting_number_of_mc_files; ++i) {
157 m_reweighting_mc_files[i] = pt.get<std::string>(std::string("reweighting_mc_file") + std::to_string(i));
158 }
159
160 unsigned int reweighting_number_of_data_files = pt.get<unsigned int>("reweighting_number_of_data_files", 0);
161 m_reweighting_data_files.resize(reweighting_number_of_data_files);
162 for (unsigned int i = 0; i < reweighting_number_of_data_files; ++i) {
163 m_reweighting_data_files[i] = pt.get<std::string>(std::string("reweighting_data_file") + std::to_string(i));
164 }
165
166 }
167
168 void MetaOptions::save(boost::property_tree::ptree& pt) const
169 {
170 pt.put("use_splot", m_use_splot);
171 pt.put("splot_variable", m_splot_variable);
172 pt.put("splot_combined", m_splot_combined);
173 pt.put("splot_boosted", m_splot_boosted);
174
175 pt.put("splot_number_of_mc_files", m_splot_mc_files.size());
176 for (unsigned int i = 0; i < m_splot_mc_files.size(); ++i) {
177 pt.put(std::string("splot_mc_file") + std::to_string(i), m_splot_mc_files[i]);
178 }
179
180 pt.put("use_sideband_subtraction", m_use_sideband_subtraction);
181 pt.put("sideband_variable", m_sideband_variable);
182
183 pt.put("sideband_number_of_mc_files", m_sideband_mc_files.size());
184 for (unsigned int i = 0; i < m_sideband_mc_files.size(); ++i) {
185 pt.put(std::string("sideband_mc_file") + std::to_string(i), m_sideband_mc_files[i]);
186 }
187
188 pt.put("use_reweighting", m_use_reweighting);
189 pt.put("reweighting_variable", m_reweighting_variable);
190 pt.put("reweighting_identifier", m_reweighting_identifier);
191
192 pt.put("reweighting_number_of_mc_files", m_reweighting_mc_files.size());
193 for (unsigned int i = 0; i < m_reweighting_mc_files.size(); ++i) {
194 pt.put(std::string("reweighting_mc_file") + std::to_string(i), m_reweighting_mc_files[i]);
195 }
196
197 pt.put("reweighting_number_of_data_files", m_reweighting_data_files.size());
198 for (unsigned int i = 0; i < m_reweighting_data_files.size(); ++i) {
199 pt.put(std::string("reweighting_data_file") + std::to_string(i), m_reweighting_data_files[i]);
200 }
201
202 }
203
204 }
206}
std::vector< std::string > m_datafiles
Name of the datafiles containing the training data.
Definition: Options.h:84
int m_signal_class
Signal class which is used as signal in a classification problem.
Definition: Options.h:88
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
Definition: Options.h:86
std::string m_weight_variable
Weight variable (branch name) defining the weights.
Definition: Options.h:91
std::vector< std::string > m_spectators
Vector of all spectators (branch names) used in the training.
Definition: Options.h:87
std::string m_method
Name of the MVA method to use.
Definition: Options.h:82
unsigned int m_max_events
Maximum number of events to process, 0 means all.
Definition: Options.h:92
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:19
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:44
std::string m_treename
Name of the TTree inside the datafile containing the training data.
Definition: Options.h:85
std::string m_target_variable
Target variable (branch name) defining the target.
Definition: Options.h:90
unsigned int m_nClasses
Number of classes in a classification problem.
Definition: Options.h:89
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:74
std::string m_identifier
Identifier containing the finished training.
Definition: Options.h:83
std::string m_reweighting_variable
Variable defining for which events the reweighting should be used (1) or not used (0).
Definition: Options.h:144
bool m_use_reweighting
Use a pretraining of data against mc and weight the mc afterwards.
Definition: Options.h:142
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:101
bool m_use_splot
Use splot training.
Definition: Options.h:131
std::string m_splot_variable
Discriminating variable.
Definition: Options.h:132
std::vector< std::string > m_reweighting_mc_files
MC files for the pretraining.
Definition: Options.h:147
std::vector< std::string > m_reweighting_data_files
Data files for the pretraining.
Definition: Options.h:146
bool m_splot_combined
Combine sPlot training with PDF classifier for discriminating variable.
Definition: Options.h:134
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:128
std::string m_reweighting_identifier
Identifier used to save the reweighting expert.
Definition: Options.h:143
bool m_splot_boosted
Use boosted sPlot training (aPlot)
Definition: Options.h:135
std::vector< std::string > m_splot_mc_files
Monte carlo files used for the distribution of the discriminating variable.
Definition: Options.h:133
std::string m_sideband_variable
Variable defining the signal region (1) background region (2) negative signal region (3) or unused (o...
Definition: Options.h:139
std::vector< std::string > m_sideband_mc_files
used to estimate the number of events in the different regions
Definition: Options.h:138
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:168
bool m_use_sideband_subtraction
Use sideband subtraction.
Definition: Options.h:137
Abstract base class for different kinds of events.