Belle II Software  release-05-01-25
Options.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2016 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Thomas Keck *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <mva/interface/Options.h>
12 #include <boost/property_tree/ptree.hpp>
13 
14 namespace Belle2 {
19  namespace MVA {
20 
21  po::options_description GeneralOptions::getDescription()
22  {
23  po::options_description description("General options");
24  description.add_options()
25  ("help", "print this message")
26  ("datafiles", po::value<std::vector<std::string>>(&m_datafiles)->required()->multitoken(),
27  "ROOT files containing the training dataset")
28  ("treename", po::value<std::string>(&m_treename), "Name of tree in ROOT datafile")
29  ("identifier", po::value<std::string>(&m_identifier)->required(), "Identifier of the outputted weightfile")
30  ("variables", po::value<std::vector<std::string>>(&m_variables)->required()->multitoken(),
31  "feature variables used in the training")
32  ("spectators", po::value<std::vector<std::string>>(&m_spectators)->multitoken(),
33  "spectator variables used in the training")
34  ("target_variable", po::value<std::string>(&m_target_variable),
35  "target variable used to distinguish between signal and background, isSignal is used as default.")
36  ("signal_class", po::value<int>(&m_signal_class), "integer which identifies signal events")
37  ("weight_variable", po::value<std::string>(&m_weight_variable), "weight variable used to weight each event")
38  ("max_events", po::value<unsigned int>(&m_max_events), "maximum number of events to process, 0 means all")
39  ("method", po::value<std::string>(&m_method)->required(),
40  "MVA Method [FastBDT|TMVAClassification|TMVARegression|Python|FANN|]");
41  return description;
42  }
43 
44  void GeneralOptions::load(const boost::property_tree::ptree& pt)
45  {
46  m_method = pt.get<std::string>("method");
47  m_identifier = pt.get<std::string>("weightfile");
48  m_treename = pt.get<std::string>("treename");
49  m_target_variable = pt.get<std::string>("target_variable");
50  m_weight_variable = pt.get<std::string>("weight_variable");
51  m_signal_class = pt.get<int>("signal_class");
52  m_max_events = pt.get<unsigned int>("max_events", 0u);
53 
54  unsigned int numberOfFiles = pt.get<unsigned int>("number_data_files", 0);
55  m_datafiles.resize(numberOfFiles);
56  for (unsigned int i = 0; i < numberOfFiles; ++i) {
57  m_datafiles[i] = pt.get<std::string>(std::string("datafile") + std::to_string(i));
58  }
59 
60  unsigned int numberOfSpectators = pt.get<unsigned int>("number_spectator_variables", 0u);
61  m_spectators.resize(numberOfSpectators);
62  for (unsigned int i = 0; i < numberOfSpectators; ++i) {
63  m_spectators[i] = pt.get<std::string>(std::string("spectator") + std::to_string(i));
64  }
65 
66  auto numberOfFeatures = pt.get<unsigned int>("number_feature_variables");
67  m_variables.resize(numberOfFeatures);
68  for (unsigned int i = 0; i < numberOfFeatures; ++i) {
69  m_variables[i] = pt.get<std::string>(std::string("variable") + std::to_string(i));
70  }
71  }
72 
73  void GeneralOptions::save(boost::property_tree::ptree& pt) const
74  {
75  pt.put("method", m_method);
76  pt.put("weightfile", m_identifier);
77  pt.put("treename", m_treename);
78  pt.put("target_variable", m_target_variable);
79  pt.put("weight_variable", m_weight_variable);
80  pt.put("signal_class", m_signal_class);
81  pt.put("max_events", m_max_events);
82 
83  pt.put("number_feature_variables", m_variables.size());
84  for (unsigned int i = 0; i < m_variables.size(); ++i) {
85  pt.put(std::string("variable") + std::to_string(i), m_variables[i]);
86  }
87 
88  pt.put("number_spectator_variables", m_spectators.size());
89  for (unsigned int i = 0; i < m_spectators.size(); ++i) {
90  pt.put(std::string("spectator") + std::to_string(i), m_spectators[i]);
91  }
92 
93  pt.put("number_data_files", m_datafiles.size());
94  for (unsigned int i = 0; i < m_datafiles.size(); ++i) {
95  pt.put(std::string("datafile") + std::to_string(i), m_datafiles[i]);
96  }
97  }
98 
99  po::options_description MetaOptions::getDescription()
100  {
101  po::options_description description("Meta options");
102  description.add_options()
103  ("use_splot", po::value<bool>(&m_use_splot), "whether to do an splot training")
104  ("splot_variable", po::value<std::string>(&m_splot_variable), "Variable used as discriminating variable in sPlot training")
105  ("splot_mc_files", po::value<std::vector<std::string>>(&m_splot_mc_files)->multitoken(),
106  "Monte carlo files containing the discriminant variable with the mc truth")
107  ("splot_combined", po::value<bool>(&m_splot_combined), "Combine sPlot training with PDF classifier for discriminating variable")
108  ("splot_boosted", po::value<bool>(&m_splot_boosted), "Use boosted sPlot training (aPlot)")
109  ("use_sideband_substraction", po::value<bool>(&m_use_sideband_substraction), "whether to do a sideband substraction training")
110  ("sideband_mc_files", po::value<std::vector<std::string>>(&m_sideband_mc_files)->multitoken(),
111  "Monte carlo files used to estimate the number of events in the different regions. (Must contain the same signal / background distribution as is expected in data)")
112  ("sideband_variable", po::value<std::string>(&m_sideband_variable),
113  "Variable defining the signal region (1) background region (2) negative signal region (3) or unused (otherwise) for the sideband substraction")
114  ("use_reweighting", po::value<bool>(&m_use_reweighting), "whether to do a reweighting pre training")
115  ("reweighting_variable", po::value<std::string>(&m_reweighting_variable),
116  "Variable defining for which events the reweighting should be used (1) or not used (0). If empty the reweighting is applied to all events")
117  ("reweighting_identifier", po::value<std::string>(&m_reweighting_identifier),
118  "Identifier used to save the reweighting expert.")
119  ("reweighting_mc_files", po::value<std::vector<std::string>>(&m_reweighting_mc_files)->multitoken(),
120  "Monte carlo files for the reweighting pretraining (Must contain the same luminosity as the given data files)")
121  ("reweighting_data_files", po::value<std::vector<std::string>>(&m_reweighting_data_files)->multitoken(),
122  "Data files for the reweighting pretraining (Must contain the same luminosity as the given MC files)");
123  return description;
124  }
125 
126  void MetaOptions::load(const boost::property_tree::ptree& pt)
127  {
128  m_use_splot = pt.get<bool>("use_splot", false);
129  m_splot_combined = pt.get<bool>("splot_combined", false);
130  m_splot_boosted = pt.get<bool>("splot_boosted", false);
131  m_splot_variable = pt.get<std::string>("splot_variable", "");
132 
133  unsigned int splot_number_of_mc_files = pt.get<unsigned int>("splot_number_of_mc_files", 0);
134  m_splot_mc_files.resize(splot_number_of_mc_files);
135  for (unsigned int i = 0; i < splot_number_of_mc_files; ++i) {
136  m_splot_mc_files[i] = pt.get<std::string>(std::string("splot_mc_file") + std::to_string(i));
137  }
138 
139  m_use_sideband_substraction = pt.get<bool>("use_sideband_substraction");
140  m_sideband_variable = pt.get<std::string>("sideband_variable");
141 
142  unsigned int sideband_number_of_mc_files = pt.get<unsigned int>("sideband_number_of_mc_files", 0);
143  m_sideband_mc_files.resize(sideband_number_of_mc_files);
144  for (unsigned int i = 0; i < sideband_number_of_mc_files; ++i) {
145  m_sideband_mc_files[i] = pt.get<std::string>(std::string("sideband_mc_file") + std::to_string(i));
146  }
147 
148  m_use_reweighting = pt.get<bool>("use_reweighting", false);
149  m_reweighting_variable = pt.get<std::string>("reweighting_variable");
150  m_reweighting_identifier = pt.get<std::string>("reweighting_identifier");
151 
152  unsigned int reweighting_number_of_mc_files = pt.get<unsigned int>("reweighting_number_of_mc_files", 0);
153  m_reweighting_mc_files.resize(reweighting_number_of_mc_files);
154  for (unsigned int i = 0; i < reweighting_number_of_mc_files; ++i) {
155  m_reweighting_mc_files[i] = pt.get<std::string>(std::string("reweighting_mc_file") + std::to_string(i));
156  }
157 
158  unsigned int reweighting_number_of_data_files = pt.get<unsigned int>("reweighting_number_of_data_files", 0);
159  m_reweighting_data_files.resize(reweighting_number_of_data_files);
160  for (unsigned int i = 0; i < reweighting_number_of_data_files; ++i) {
161  m_reweighting_data_files[i] = pt.get<std::string>(std::string("reweighting_data_file") + std::to_string(i));
162  }
163 
164  }
165 
166  void MetaOptions::save(boost::property_tree::ptree& pt) const
167  {
168  pt.put("use_splot", m_use_splot);
169  pt.put("splot_variable", m_splot_variable);
170  pt.put("splot_combined", m_splot_combined);
171  pt.put("splot_boosted", m_splot_boosted);
172 
173  pt.put("splot_number_of_mc_files", m_splot_mc_files.size());
174  for (unsigned int i = 0; i < m_splot_mc_files.size(); ++i) {
175  pt.put(std::string("splot_mc_file") + std::to_string(i), m_splot_mc_files[i]);
176  }
177 
178  pt.put("use_sideband_substraction", m_use_sideband_substraction);
179  pt.put("sideband_variable", m_sideband_variable);
180 
181  pt.put("sideband_number_of_mc_files", m_sideband_mc_files.size());
182  for (unsigned int i = 0; i < m_sideband_mc_files.size(); ++i) {
183  pt.put(std::string("sideband_mc_file") + std::to_string(i), m_sideband_mc_files[i]);
184  }
185 
186  pt.put("use_reweighting", m_use_reweighting);
187  pt.put("reweighting_variable", m_reweighting_variable);
188  pt.put("reweighting_identifier", m_reweighting_identifier);
189 
190  pt.put("reweighting_number_of_mc_files", m_reweighting_mc_files.size());
191  for (unsigned int i = 0; i < m_reweighting_mc_files.size(); ++i) {
192  pt.put(std::string("reweighting_mc_file") + std::to_string(i), m_reweighting_mc_files[i]);
193  }
194 
195  pt.put("reweighting_number_of_data_files", m_reweighting_data_files.size());
196  for (unsigned int i = 0; i < m_reweighting_data_files.size(); ++i) {
197  pt.put(std::string("reweighting_data_file") + std::to_string(i), m_reweighting_data_files[i]);
198  }
199 
200  }
201 
202  }
204 }
Belle2::MVA::GeneralOptions::m_identifier
std::string m_identifier
Identifier containing the finished training.
Definition: Options.h:85
Belle2::MVA::MetaOptions::m_use_sideband_substraction
bool m_use_sideband_substraction
Use sideband substraction.
Definition: Options.h:138
Belle2::MVA::MetaOptions::m_reweighting_data_files
std::vector< std::string > m_reweighting_data_files
Data files for the pretraining.
Definition: Options.h:147
Belle2::MVA::MetaOptions::m_splot_boosted
bool m_splot_boosted
Use boosted sPlot training (aPlot)
Definition: Options.h:136
Belle2::MVA::MetaOptions::load
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:134
Belle2::MVA::GeneralOptions::m_max_events
unsigned int m_max_events
Maximum number of events to process, 0 means all.
Definition: Options.h:93
Belle2::MVA::MetaOptions::m_reweighting_mc_files
std::vector< std::string > m_reweighting_mc_files
MC files for the pretraining.
Definition: Options.h:148
Belle2::MVA::MetaOptions::m_sideband_mc_files
std::vector< std::string > m_sideband_mc_files
used to estimate the number of events in the different regions
Definition: Options.h:139
Belle2::MVA::MetaOptions::m_reweighting_variable
std::string m_reweighting_variable
Variable defining for which events the reweighting should be used (1) or not used (0).
Definition: Options.h:145
Belle2::MVA::GeneralOptions::load
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: Options.cc:52
Belle2::MVA::GeneralOptions::m_weight_variable
std::string m_weight_variable
Weight variable (branch name) defining the weights.
Definition: Options.h:92
Belle2::MVA::MetaOptions::m_splot_variable
std::string m_splot_variable
Discriminating variable.
Definition: Options.h:133
Belle2::MVA::MetaOptions::getDescription
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:107
Belle2::MVA::GeneralOptions::getDescription
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Options.cc:29
Belle2::MVA::MetaOptions::m_splot_mc_files
std::vector< std::string > m_splot_mc_files
Monte carlo files used for the distribution of the discriminating variable.
Definition: Options.h:134
Belle2::MVA::MetaOptions::save
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:174
Belle2::MVA::MetaOptions::m_reweighting_identifier
std::string m_reweighting_identifier
Identifier used to save the reweighting expert.
Definition: Options.h:144
Belle2::MVA::GeneralOptions::m_treename
std::string m_treename
Name of the TTree inside the datafile containing the training data.
Definition: Options.h:87
Belle2::MVA::GeneralOptions::m_spectators
std::vector< std::string > m_spectators
Vector of all spectators (branch names) used in the training.
Definition: Options.h:89
Belle2::MVA::GeneralOptions::m_method
std::string m_method
Name of the MVA method to use.
Definition: Options.h:84
Belle2::MVA::MetaOptions::m_use_reweighting
bool m_use_reweighting
Use a pretraining of data against mc and weight the mc afterwards.
Definition: Options.h:143
Belle2::MVA::MetaOptions::m_use_splot
bool m_use_splot
Use splot training.
Definition: Options.h:132
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MVA::GeneralOptions::m_target_variable
std::string m_target_variable
Target variable (branch name) defining the target.
Definition: Options.h:91
Belle2::MVA::GeneralOptions::m_signal_class
int m_signal_class
Signal class which is used as signal in a classification problem.
Definition: Options.h:90
Belle2::MVA::GeneralOptions::m_variables
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
Definition: Options.h:88
Belle2::MVA::GeneralOptions::m_datafiles
std::vector< std::string > m_datafiles
Name of the datafiles containing the training data.
Definition: Options.h:86
Belle2::MVA::MetaOptions::m_splot_combined
bool m_splot_combined
Combine sPlot training with PDF classifier for discriminating variable.
Definition: Options.h:135
Belle2::MVA::MetaOptions::m_sideband_variable
std::string m_sideband_variable
Variable defining the signal region (1) background region (2) negative signal region (3) or unused (o...
Definition: Options.h:140
Belle2::MVA::GeneralOptions::save
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: Options.cc:81