Belle II Software  release-06-02-00
Options.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <mva/interface/Options.h>
10 #include <boost/property_tree/ptree.hpp>
11 
12 namespace Belle2 {
17  namespace MVA {
18 
19  po::options_description GeneralOptions::getDescription()
20  {
21  po::options_description description("General options");
22  description.add_options()
23  ("help", "print this message")
24  ("datafiles", po::value<std::vector<std::string>>(&m_datafiles)->required()->multitoken(),
25  "ROOT files containing the training dataset")
26  ("treename", po::value<std::string>(&m_treename), "Name of tree in ROOT datafile")
27  ("identifier", po::value<std::string>(&m_identifier)->required(), "Identifier of the outputted weightfile")
28  ("variables", po::value<std::vector<std::string>>(&m_variables)->required()->multitoken(),
29  "feature variables used in the training")
30  ("spectators", po::value<std::vector<std::string>>(&m_spectators)->multitoken(),
31  "spectator variables used in the training")
32  ("target_variable", po::value<std::string>(&m_target_variable),
33  "target variable used to distinguish between signal and background, isSignal is used as default.")
34  ("signal_class", po::value<int>(&m_signal_class), "integer which identifies signal events")
35  ("weight_variable", po::value<std::string>(&m_weight_variable), "weight variable used to weight each event")
36  ("max_events", po::value<unsigned int>(&m_max_events), "maximum number of events to process, 0 means all")
37  ("method", po::value<std::string>(&m_method)->required(),
38  "MVA Method [FastBDT|TMVAClassification|TMVARegression|Python|FANN|]");
39  return description;
40  }
41 
42  void GeneralOptions::load(const boost::property_tree::ptree& pt)
43  {
44  m_method = pt.get<std::string>("method");
45  m_identifier = pt.get<std::string>("weightfile");
46  m_treename = pt.get<std::string>("treename");
47  m_target_variable = pt.get<std::string>("target_variable");
48  m_weight_variable = pt.get<std::string>("weight_variable");
49  m_signal_class = pt.get<int>("signal_class");
50  m_max_events = pt.get<unsigned int>("max_events", 0u);
51 
52  unsigned int numberOfFiles = pt.get<unsigned int>("number_data_files", 0);
53  m_datafiles.resize(numberOfFiles);
54  for (unsigned int i = 0; i < numberOfFiles; ++i) {
55  m_datafiles[i] = pt.get<std::string>(std::string("datafile") + std::to_string(i));
56  }
57 
58  unsigned int numberOfSpectators = pt.get<unsigned int>("number_spectator_variables", 0u);
59  m_spectators.resize(numberOfSpectators);
60  for (unsigned int i = 0; i < numberOfSpectators; ++i) {
61  m_spectators[i] = pt.get<std::string>(std::string("spectator") + std::to_string(i));
62  }
63 
64  auto numberOfFeatures = pt.get<unsigned int>("number_feature_variables");
65  m_variables.resize(numberOfFeatures);
66  for (unsigned int i = 0; i < numberOfFeatures; ++i) {
67  m_variables[i] = pt.get<std::string>(std::string("variable") + std::to_string(i));
68  }
69  }
70 
71  void GeneralOptions::save(boost::property_tree::ptree& pt) const
72  {
73  pt.put("method", m_method);
74  pt.put("weightfile", m_identifier);
75  pt.put("treename", m_treename);
76  pt.put("target_variable", m_target_variable);
77  pt.put("weight_variable", m_weight_variable);
78  pt.put("signal_class", m_signal_class);
79  pt.put("max_events", m_max_events);
80 
81  pt.put("number_feature_variables", m_variables.size());
82  for (unsigned int i = 0; i < m_variables.size(); ++i) {
83  pt.put(std::string("variable") + std::to_string(i), m_variables[i]);
84  }
85 
86  pt.put("number_spectator_variables", m_spectators.size());
87  for (unsigned int i = 0; i < m_spectators.size(); ++i) {
88  pt.put(std::string("spectator") + std::to_string(i), m_spectators[i]);
89  }
90 
91  pt.put("number_data_files", m_datafiles.size());
92  for (unsigned int i = 0; i < m_datafiles.size(); ++i) {
93  pt.put(std::string("datafile") + std::to_string(i), m_datafiles[i]);
94  }
95  }
96 
97  po::options_description MetaOptions::getDescription()
98  {
99  po::options_description description("Meta options");
100  description.add_options()
101  ("use_splot", po::value<bool>(&m_use_splot), "whether to do an splot training")
102  ("splot_variable", po::value<std::string>(&m_splot_variable), "Variable used as discriminating variable in sPlot training")
103  ("splot_mc_files", po::value<std::vector<std::string>>(&m_splot_mc_files)->multitoken(),
104  "Monte carlo files containing the discriminant variable with the mc truth")
105  ("splot_combined", po::value<bool>(&m_splot_combined), "Combine sPlot training with PDF classifier for discriminating variable")
106  ("splot_boosted", po::value<bool>(&m_splot_boosted), "Use boosted sPlot training (aPlot)")
107  ("use_sideband_subtraction", po::value<bool>(&m_use_sideband_subtraction), "whether to do a sideband subtraction training")
108  ("sideband_mc_files", po::value<std::vector<std::string>>(&m_sideband_mc_files)->multitoken(),
109  "Monte carlo files used to estimate the number of events in the different regions. (Must contain the same signal / background distribution as is expected in data)")
110  ("sideband_variable", po::value<std::string>(&m_sideband_variable),
111  "Variable defining the signal region (1) background region (2) negative signal region (3) or unused (otherwise) for the sideband subtraction")
112  ("use_reweighting", po::value<bool>(&m_use_reweighting), "whether to do a reweighting pre training")
113  ("reweighting_variable", po::value<std::string>(&m_reweighting_variable),
114  "Variable defining for which events the reweighting should be used (1) or not used (0). If empty the reweighting is applied to all events")
115  ("reweighting_identifier", po::value<std::string>(&m_reweighting_identifier),
116  "Identifier used to save the reweighting expert.")
117  ("reweighting_mc_files", po::value<std::vector<std::string>>(&m_reweighting_mc_files)->multitoken(),
118  "Monte carlo files for the reweighting pretraining (Must contain the same luminosity as the given data files)")
119  ("reweighting_data_files", po::value<std::vector<std::string>>(&m_reweighting_data_files)->multitoken(),
120  "Data files for the reweighting pretraining (Must contain the same luminosity as the given MC files)");
121  return description;
122  }
123 
124  void MetaOptions::load(const boost::property_tree::ptree& pt)
125  {
126  m_use_splot = pt.get<bool>("use_splot", false);
127  m_splot_combined = pt.get<bool>("splot_combined", false);
128  m_splot_boosted = pt.get<bool>("splot_boosted", false);
129  m_splot_variable = pt.get<std::string>("splot_variable", "");
130 
131  unsigned int splot_number_of_mc_files = pt.get<unsigned int>("splot_number_of_mc_files", 0);
132  m_splot_mc_files.resize(splot_number_of_mc_files);
133  for (unsigned int i = 0; i < splot_number_of_mc_files; ++i) {
134  m_splot_mc_files[i] = pt.get<std::string>(std::string("splot_mc_file") + std::to_string(i));
135  }
136 
137  m_use_sideband_subtraction = pt.get<bool>("use_sideband_subtraction");
138  m_sideband_variable = pt.get<std::string>("sideband_variable");
139 
140  unsigned int sideband_number_of_mc_files = pt.get<unsigned int>("sideband_number_of_mc_files", 0);
141  m_sideband_mc_files.resize(sideband_number_of_mc_files);
142  for (unsigned int i = 0; i < sideband_number_of_mc_files; ++i) {
143  m_sideband_mc_files[i] = pt.get<std::string>(std::string("sideband_mc_file") + std::to_string(i));
144  }
145 
146  m_use_reweighting = pt.get<bool>("use_reweighting", false);
147  m_reweighting_variable = pt.get<std::string>("reweighting_variable");
148  m_reweighting_identifier = pt.get<std::string>("reweighting_identifier");
149 
150  unsigned int reweighting_number_of_mc_files = pt.get<unsigned int>("reweighting_number_of_mc_files", 0);
151  m_reweighting_mc_files.resize(reweighting_number_of_mc_files);
152  for (unsigned int i = 0; i < reweighting_number_of_mc_files; ++i) {
153  m_reweighting_mc_files[i] = pt.get<std::string>(std::string("reweighting_mc_file") + std::to_string(i));
154  }
155 
156  unsigned int reweighting_number_of_data_files = pt.get<unsigned int>("reweighting_number_of_data_files", 0);
157  m_reweighting_data_files.resize(reweighting_number_of_data_files);
158  for (unsigned int i = 0; i < reweighting_number_of_data_files; ++i) {
159  m_reweighting_data_files[i] = pt.get<std::string>(std::string("reweighting_data_file") + std::to_string(i));
160  }
161 
162  }
163 
164  void MetaOptions::save(boost::property_tree::ptree& pt) const
165  {
166  pt.put("use_splot", m_use_splot);
167  pt.put("splot_variable", m_splot_variable);
168  pt.put("splot_combined", m_splot_combined);
169  pt.put("splot_boosted", m_splot_boosted);
170 
171  pt.put("splot_number_of_mc_files", m_splot_mc_files.size());
172  for (unsigned int i = 0; i < m_splot_mc_files.size(); ++i) {
173  pt.put(std::string("splot_mc_file") + std::to_string(i), m_splot_mc_files[i]);
174  }
175 
176  pt.put("use_sideband_subtraction", m_use_sideband_subtraction);
177  pt.put("sideband_variable", m_sideband_variable);
178 
179  pt.put("sideband_number_of_mc_files", m_sideband_mc_files.size());
180  for (unsigned int i = 0; i < m_sideband_mc_files.size(); ++i) {
181  pt.put(std::string("sideband_mc_file") + std::to_string(i), m_sideband_mc_files[i]);
182  }
183 
184  pt.put("use_reweighting", m_use_reweighting);
185  pt.put("reweighting_variable", m_reweighting_variable);
186  pt.put("reweighting_identifier", m_reweighting_identifier);
187 
188  pt.put("reweighting_number_of_mc_files", m_reweighting_mc_files.size());
189  for (unsigned int i = 0; i < m_reweighting_mc_files.size(); ++i) {
190  pt.put(std::string("reweighting_mc_file") + std::to_string(i), m_reweighting_mc_files[i]);
191  }
192 
193  pt.put("reweighting_number_of_data_files", m_reweighting_data_files.size());
194  for (unsigned int i = 0; i < m_reweighting_data_files.size(); ++i) {
195  pt.put(std::string("reweighting_data_file") + std::to_string(i), m_reweighting_data_files[i]);
196  }
197 
198  }
199 
200  }
202 }
std::vector< std::string > m_datafiles
Name of the datafiles containing the training data.
Definition: Options.h:84
int m_signal_class
Signal class which is used as signal in a classification problem.
Definition: Options.h:88
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
Definition: Options.h:86
std::string m_weight_variable
Weight variable (branch name) defining the weights.
Definition: Options.h:90
std::vector< std::string > m_spectators
Vector of all spectators (branch names) used in the training.
Definition: Options.h:87
std::string m_method
Name of the MVA method to use.
Definition: Options.h:82
unsigned int m_max_events
Maximum number of events to process, 0 means all.
Definition: Options.h:91
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:19
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:42
std::string m_treename
Name of the TTree inside the datafile containing the training data.
Definition: Options.h:85
std::string m_target_variable
Target variable (branch name) defining the target.
Definition: Options.h:89
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:71
std::string m_identifier
Identifier containing the finished training.
Definition: Options.h:83
std::string m_reweighting_variable
Variable defining for which events the reweighting should be used (1) or not used (0).
Definition: Options.h:143
bool m_use_reweighting
Use a pretraining of data against mc and weight the mc afterwards.
Definition: Options.h:141
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:97
bool m_use_splot
Use splot training.
Definition: Options.h:130
std::string m_splot_variable
Discriminating variable.
Definition: Options.h:131
std::vector< std::string > m_reweighting_mc_files
MC files for the pretraining.
Definition: Options.h:146
std::vector< std::string > m_reweighting_data_files
Data files for the pretraining.
Definition: Options.h:145
bool m_splot_combined
Combine sPlot training with PDF classifier for discriminating variable.
Definition: Options.h:133
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:124
std::string m_reweighting_identifier
Identifier used to save the reweighting expert.
Definition: Options.h:142
bool m_splot_boosted
Use boosted sPlot training (aPlot)
Definition: Options.h:134
std::vector< std::string > m_splot_mc_files
Monte carlo files used for the distribution of the discriminating variable.
Definition: Options.h:132
std::string m_sideband_variable
Variable defining the signal region (1) background region (2) negative signal region (3) or unused (o...
Definition: Options.h:138
std::vector< std::string > m_sideband_mc_files
used to estimate the number of events in the different regions
Definition: Options.h:137
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:164
bool m_use_sideband_subtraction
Use sideband subtraction.
Definition: Options.h:136
Abstract base class for different kinds of events.