Belle II Software  release-08-01-10
Options.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <mva/interface/Options.h>
10 #include <boost/property_tree/ptree.hpp>
11 
12 namespace Belle2 {
17  namespace MVA {
18 
19  po::options_description GeneralOptions::getDescription()
20  {
21  po::options_description description("General options");
22  description.add_options()
23  ("help", "print this message")
24  ("datafiles", po::value<std::vector<std::string>>(&m_datafiles)->required()->multitoken(),
25  "ROOT files containing the training dataset")
26  ("treename", po::value<std::string>(&m_treename), "Name of tree in ROOT datafile")
27  ("identifier", po::value<std::string>(&m_identifier)->required(), "Identifier of the outputted weightfile")
28  ("variables", po::value<std::vector<std::string>>(&m_variables)->required()->multitoken(),
29  "feature variables used in the training")
30  ("spectators", po::value<std::vector<std::string>>(&m_spectators)->multitoken(),
31  "spectator variables used in the training")
32  ("target_variable", po::value<std::string>(&m_target_variable),
33  "target variable used to distinguish between signal and background, isSignal is used as default.")
34  ("signal_class", po::value<int>(&m_signal_class), "integer which identifies signal events")
35  ("nClasses", po::value<unsigned int>(&m_nClasses),
36  "number of classes under consideration. Must be supplied for multiclass classifications. Not all methods support multiclass classification.")
37  ("weight_variable", po::value<std::string>(&m_weight_variable), "weight variable used to weight each event")
38  ("max_events", po::value<unsigned int>(&m_max_events), "maximum number of events to process, 0 means all")
39  ("method", po::value<std::string>(&m_method)->required(),
40  "MVA Method [FastBDT|TMVAClassification|TMVARegression|Python|FANN|]");
41  return description;
42  }
43 
44  void GeneralOptions::load(const boost::property_tree::ptree& pt)
45  {
46  m_method = pt.get<std::string>("method");
47  m_identifier = pt.get<std::string>("weightfile");
48  m_treename = pt.get<std::string>("treename");
49  m_target_variable = pt.get<std::string>("target_variable");
50  m_weight_variable = pt.get<std::string>("weight_variable");
51  m_signal_class = pt.get<int>("signal_class");
52  m_max_events = pt.get<unsigned int>("max_events", 0u);
53  m_nClasses = pt.get<unsigned int>("nClasses", 2u);
54 
55  unsigned int numberOfFiles = pt.get<unsigned int>("number_data_files", 0);
56  m_datafiles.resize(numberOfFiles);
57  for (unsigned int i = 0; i < numberOfFiles; ++i) {
58  m_datafiles[i] = pt.get<std::string>(std::string("datafile") + std::to_string(i));
59  }
60 
61  unsigned int numberOfSpectators = pt.get<unsigned int>("number_spectator_variables", 0u);
62  m_spectators.resize(numberOfSpectators);
63  for (unsigned int i = 0; i < numberOfSpectators; ++i) {
64  m_spectators[i] = pt.get<std::string>(std::string("spectator") + std::to_string(i));
65  }
66 
67  auto numberOfFeatures = pt.get<unsigned int>("number_feature_variables");
68  m_variables.resize(numberOfFeatures);
69  for (unsigned int i = 0; i < numberOfFeatures; ++i) {
70  m_variables[i] = pt.get<std::string>(std::string("variable") + std::to_string(i));
71  }
72  }
73 
74  void GeneralOptions::save(boost::property_tree::ptree& pt) const
75  {
76  pt.put("method", m_method);
77  pt.put("weightfile", m_identifier);
78  pt.put("treename", m_treename);
79  pt.put("target_variable", m_target_variable);
80  pt.put("weight_variable", m_weight_variable);
81  pt.put("signal_class", m_signal_class);
82  pt.put("max_events", m_max_events);
83  pt.put("nClasses", m_nClasses);
84 
85  pt.put("number_feature_variables", m_variables.size());
86  for (unsigned int i = 0; i < m_variables.size(); ++i) {
87  pt.put(std::string("variable") + std::to_string(i), m_variables[i]);
88  }
89 
90  pt.put("number_spectator_variables", m_spectators.size());
91  for (unsigned int i = 0; i < m_spectators.size(); ++i) {
92  pt.put(std::string("spectator") + std::to_string(i), m_spectators[i]);
93  }
94 
95  pt.put("number_data_files", m_datafiles.size());
96  for (unsigned int i = 0; i < m_datafiles.size(); ++i) {
97  pt.put(std::string("datafile") + std::to_string(i), m_datafiles[i]);
98  }
99  }
100 
101  po::options_description MetaOptions::getDescription()
102  {
103  po::options_description description("Meta options");
104  description.add_options()
105  ("use_splot", po::value<bool>(&m_use_splot), "whether to do an splot training")
106  ("splot_variable", po::value<std::string>(&m_splot_variable), "Variable used as discriminating variable in sPlot training")
107  ("splot_mc_files", po::value<std::vector<std::string>>(&m_splot_mc_files)->multitoken(),
108  "Monte carlo files containing the discriminant variable with the mc truth")
109  ("splot_combined", po::value<bool>(&m_splot_combined), "Combine sPlot training with PDF classifier for discriminating variable")
110  ("splot_boosted", po::value<bool>(&m_splot_boosted), "Use boosted sPlot training (aPlot)")
111  ("use_sideband_subtraction", po::value<bool>(&m_use_sideband_subtraction), "whether to do a sideband subtraction training")
112  ("sideband_mc_files", po::value<std::vector<std::string>>(&m_sideband_mc_files)->multitoken(),
113  "Monte carlo files used to estimate the number of events in the different regions. (Must contain the same signal / background distribution as is expected in data)")
114  ("sideband_variable", po::value<std::string>(&m_sideband_variable),
115  "Variable defining the signal region (1) background region (2) negative signal region (3) or unused (otherwise) for the sideband subtraction")
116  ("use_reweighting", po::value<bool>(&m_use_reweighting), "whether to do a reweighting pre training")
117  ("reweighting_variable", po::value<std::string>(&m_reweighting_variable),
118  "Variable defining for which events the reweighting should be used (1) or not used (0). If empty the reweighting is applied to all events")
119  ("reweighting_identifier", po::value<std::string>(&m_reweighting_identifier),
120  "Identifier used to save the reweighting expert.")
121  ("reweighting_mc_files", po::value<std::vector<std::string>>(&m_reweighting_mc_files)->multitoken(),
122  "Monte carlo files for the reweighting pretraining (Must contain the same luminosity as the given data files)")
123  ("reweighting_data_files", po::value<std::vector<std::string>>(&m_reweighting_data_files)->multitoken(),
124  "Data files for the reweighting pretraining (Must contain the same luminosity as the given MC files)");
125  return description;
126  }
127 
128  void MetaOptions::load(const boost::property_tree::ptree& pt)
129  {
130  m_use_splot = pt.get<bool>("use_splot", false);
131  m_splot_combined = pt.get<bool>("splot_combined", false);
132  m_splot_boosted = pt.get<bool>("splot_boosted", false);
133  m_splot_variable = pt.get<std::string>("splot_variable", "");
134 
135  unsigned int splot_number_of_mc_files = pt.get<unsigned int>("splot_number_of_mc_files", 0);
136  m_splot_mc_files.resize(splot_number_of_mc_files);
137  for (unsigned int i = 0; i < splot_number_of_mc_files; ++i) {
138  m_splot_mc_files[i] = pt.get<std::string>(std::string("splot_mc_file") + std::to_string(i));
139  }
140 
141  m_use_sideband_subtraction = pt.get<bool>("use_sideband_subtraction");
142  m_sideband_variable = pt.get<std::string>("sideband_variable");
143 
144  unsigned int sideband_number_of_mc_files = pt.get<unsigned int>("sideband_number_of_mc_files", 0);
145  m_sideband_mc_files.resize(sideband_number_of_mc_files);
146  for (unsigned int i = 0; i < sideband_number_of_mc_files; ++i) {
147  m_sideband_mc_files[i] = pt.get<std::string>(std::string("sideband_mc_file") + std::to_string(i));
148  }
149 
150  m_use_reweighting = pt.get<bool>("use_reweighting", false);
151  m_reweighting_variable = pt.get<std::string>("reweighting_variable");
152  m_reweighting_identifier = pt.get<std::string>("reweighting_identifier");
153 
154  unsigned int reweighting_number_of_mc_files = pt.get<unsigned int>("reweighting_number_of_mc_files", 0);
155  m_reweighting_mc_files.resize(reweighting_number_of_mc_files);
156  for (unsigned int i = 0; i < reweighting_number_of_mc_files; ++i) {
157  m_reweighting_mc_files[i] = pt.get<std::string>(std::string("reweighting_mc_file") + std::to_string(i));
158  }
159 
160  unsigned int reweighting_number_of_data_files = pt.get<unsigned int>("reweighting_number_of_data_files", 0);
161  m_reweighting_data_files.resize(reweighting_number_of_data_files);
162  for (unsigned int i = 0; i < reweighting_number_of_data_files; ++i) {
163  m_reweighting_data_files[i] = pt.get<std::string>(std::string("reweighting_data_file") + std::to_string(i));
164  }
165 
166  }
167 
168  void MetaOptions::save(boost::property_tree::ptree& pt) const
169  {
170  pt.put("use_splot", m_use_splot);
171  pt.put("splot_variable", m_splot_variable);
172  pt.put("splot_combined", m_splot_combined);
173  pt.put("splot_boosted", m_splot_boosted);
174 
175  pt.put("splot_number_of_mc_files", m_splot_mc_files.size());
176  for (unsigned int i = 0; i < m_splot_mc_files.size(); ++i) {
177  pt.put(std::string("splot_mc_file") + std::to_string(i), m_splot_mc_files[i]);
178  }
179 
180  pt.put("use_sideband_subtraction", m_use_sideband_subtraction);
181  pt.put("sideband_variable", m_sideband_variable);
182 
183  pt.put("sideband_number_of_mc_files", m_sideband_mc_files.size());
184  for (unsigned int i = 0; i < m_sideband_mc_files.size(); ++i) {
185  pt.put(std::string("sideband_mc_file") + std::to_string(i), m_sideband_mc_files[i]);
186  }
187 
188  pt.put("use_reweighting", m_use_reweighting);
189  pt.put("reweighting_variable", m_reweighting_variable);
190  pt.put("reweighting_identifier", m_reweighting_identifier);
191 
192  pt.put("reweighting_number_of_mc_files", m_reweighting_mc_files.size());
193  for (unsigned int i = 0; i < m_reweighting_mc_files.size(); ++i) {
194  pt.put(std::string("reweighting_mc_file") + std::to_string(i), m_reweighting_mc_files[i]);
195  }
196 
197  pt.put("reweighting_number_of_data_files", m_reweighting_data_files.size());
198  for (unsigned int i = 0; i < m_reweighting_data_files.size(); ++i) {
199  pt.put(std::string("reweighting_data_file") + std::to_string(i), m_reweighting_data_files[i]);
200  }
201 
202  }
203 
204  }
206 }
std::vector< std::string > m_datafiles
Name of the datafiles containing the training data.
Definition: Options.h:84
int m_signal_class
Signal class which is used as signal in a classification problem.
Definition: Options.h:88
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
Definition: Options.h:86
std::string m_weight_variable
Weight variable (branch name) defining the weights.
Definition: Options.h:91
std::vector< std::string > m_spectators
Vector of all spectators (branch names) used in the training.
Definition: Options.h:87
std::string m_method
Name of the MVA method to use.
Definition: Options.h:82
unsigned int m_max_events
Maximum number of events to process, 0 means all.
Definition: Options.h:92
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:19
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:44
std::string m_treename
Name of the TTree inside the datafile containing the training data.
Definition: Options.h:85
std::string m_target_variable
Target variable (branch name) defining the target.
Definition: Options.h:90
unsigned int m_nClasses
Number of classes in a classification problem.
Definition: Options.h:89
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:74
std::string m_identifier
Identifier containing the finished training.
Definition: Options.h:83
std::string m_reweighting_variable
Variable defining for which events the reweighting should be used (1) or not used (0).
Definition: Options.h:144
bool m_use_reweighting
Use a pretraining of data against mc and weight the mc afterwards.
Definition: Options.h:142
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:101
bool m_use_splot
Use splot training.
Definition: Options.h:131
std::string m_splot_variable
Discriminating variable.
Definition: Options.h:132
std::vector< std::string > m_reweighting_mc_files
MC files for the pretraining.
Definition: Options.h:147
std::vector< std::string > m_reweighting_data_files
Data files for the pretraining.
Definition: Options.h:146
bool m_splot_combined
Combine sPlot training with PDF classifier for discriminating variable.
Definition: Options.h:134
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:128
std::string m_reweighting_identifier
Identifier used to save the reweighting expert.
Definition: Options.h:143
bool m_splot_boosted
Use boosted sPlot training (aPlot)
Definition: Options.h:135
std::vector< std::string > m_splot_mc_files
Monte carlo files used for the distribution of the discriminating variable.
Definition: Options.h:133
std::string m_sideband_variable
Variable defining the signal region (1) background region (2) negative signal region (3) or unused (o...
Definition: Options.h:139
std::vector< std::string > m_sideband_mc_files
used to estimate the number of events in the different regions
Definition: Options.h:138
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:168
bool m_use_sideband_subtraction
Use sideband subtraction.
Definition: Options.h:137
Abstract base class for different kinds of events.