9 #include <mva/interface/Options.h>
10 #include <boost/property_tree/ptree.hpp>
21 po::options_description description(
"General options");
22 description.add_options()
23 (
"help",
"print this message")
24 (
"datafiles", po::value<std::vector<std::string>>(&
m_datafiles)->required()->multitoken(),
25 "ROOT files containing the training dataset")
26 (
"treename", po::value<std::string>(&
m_treename),
"Name of tree in ROOT datafile")
27 (
"identifier", po::value<std::string>(&
m_identifier)->required(),
"Identifier of the outputted weightfile")
28 (
"variables", po::value<std::vector<std::string>>(&
m_variables)->required()->multitoken(),
29 "feature variables used in the training")
30 (
"spectators", po::value<std::vector<std::string>>(&
m_spectators)->multitoken(),
31 "spectator variables used in the training")
33 "target variable used to distinguish between signal and background, isSignal is used as default.")
34 (
"signal_class", po::value<int>(&
m_signal_class),
"integer which identifies signal events")
35 (
"weight_variable", po::value<std::string>(&
m_weight_variable),
"weight variable used to weight each event")
36 (
"max_events", po::value<unsigned int>(&
m_max_events),
"maximum number of events to process, 0 means all")
37 (
"method", po::value<std::string>(&
m_method)->required(),
38 "MVA Method [FastBDT|TMVAClassification|TMVARegression|Python|FANN|]");
44 m_method = pt.get<std::string>(
"method");
52 unsigned int numberOfFiles = pt.get<
unsigned int>(
"number_data_files", 0);
54 for (
unsigned int i = 0; i < numberOfFiles; ++i) {
55 m_datafiles[i] = pt.get<std::string>(std::string(
"datafile") + std::to_string(i));
58 unsigned int numberOfSpectators = pt.get<
unsigned int>(
"number_spectator_variables", 0u);
60 for (
unsigned int i = 0; i < numberOfSpectators; ++i) {
61 m_spectators[i] = pt.get<std::string>(std::string(
"spectator") + std::to_string(i));
64 auto numberOfFeatures = pt.get<
unsigned int>(
"number_feature_variables");
66 for (
unsigned int i = 0; i < numberOfFeatures; ++i) {
67 m_variables[i] = pt.get<std::string>(std::string(
"variable") + std::to_string(i));
81 pt.put(
"number_feature_variables",
m_variables.size());
82 for (
unsigned int i = 0; i <
m_variables.size(); ++i) {
83 pt.put(std::string(
"variable") + std::to_string(i),
m_variables[i]);
86 pt.put(
"number_spectator_variables",
m_spectators.size());
88 pt.put(std::string(
"spectator") + std::to_string(i),
m_spectators[i]);
92 for (
unsigned int i = 0; i <
m_datafiles.size(); ++i) {
93 pt.put(std::string(
"datafile") + std::to_string(i),
m_datafiles[i]);
99 po::options_description description(
"Meta options");
100 description.add_options()
101 (
"use_splot", po::value<bool>(&
m_use_splot),
"whether to do an splot training")
102 (
"splot_variable", po::value<std::string>(&
m_splot_variable),
"Variable used as discriminating variable in sPlot training")
103 (
"splot_mc_files", po::value<std::vector<std::string>>(&
m_splot_mc_files)->multitoken(),
104 "Monte carlo files containing the discriminant variable with the mc truth")
105 (
"splot_combined", po::value<bool>(&
m_splot_combined),
"Combine sPlot training with PDF classifier for discriminating variable")
106 (
"splot_boosted", po::value<bool>(&
m_splot_boosted),
"Use boosted sPlot training (aPlot)")
108 (
"sideband_mc_files", po::value<std::vector<std::string>>(&
m_sideband_mc_files)->multitoken(),
109 "Monte carlo files used to estimate the number of events in the different regions. (Must contain the same signal / background distribution as is expected in data)")
111 "Variable defining the signal region (1) background region (2) negative signal region (3) or unused (otherwise) for the sideband subtraction")
112 (
"use_reweighting", po::value<bool>(&
m_use_reweighting),
"whether to do a reweighting pre training")
114 "Variable defining for which events the reweighting should be used (1) or not used (0). If empty the reweighting is applied to all events")
116 "Identifier used to save the reweighting expert.")
118 "Monte carlo files for the reweighting pretraining (Must contain the same luminosity as the given data files)")
120 "Data files for the reweighting pretraining (Must contain the same luminosity as the given MC files)");
131 unsigned int splot_number_of_mc_files = pt.get<
unsigned int>(
"splot_number_of_mc_files", 0);
133 for (
unsigned int i = 0; i < splot_number_of_mc_files; ++i) {
134 m_splot_mc_files[i] = pt.get<std::string>(std::string(
"splot_mc_file") + std::to_string(i));
140 unsigned int sideband_number_of_mc_files = pt.get<
unsigned int>(
"sideband_number_of_mc_files", 0);
142 for (
unsigned int i = 0; i < sideband_number_of_mc_files; ++i) {
143 m_sideband_mc_files[i] = pt.get<std::string>(std::string(
"sideband_mc_file") + std::to_string(i));
150 unsigned int reweighting_number_of_mc_files = pt.get<
unsigned int>(
"reweighting_number_of_mc_files", 0);
152 for (
unsigned int i = 0; i < reweighting_number_of_mc_files; ++i) {
156 unsigned int reweighting_number_of_data_files = pt.get<
unsigned int>(
"reweighting_number_of_data_files", 0);
158 for (
unsigned int i = 0; i < reweighting_number_of_data_files; ++i) {
173 pt.put(std::string(
"splot_mc_file") + std::to_string(i),
m_splot_mc_files[i]);
std::vector< std::string > m_datafiles
Name of the datafiles containing the training data.
int m_signal_class
Signal class which is used as signal in a classification problem.
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
std::string m_weight_variable
Weight variable (branch name) defining the weights.
std::vector< std::string > m_spectators
Vector of all spectators (branch names) used in the training.
std::string m_method
Name of the MVA method to use.
unsigned int m_max_events
Maximum number of events to process, 0 means all.
virtual po::options_description getDescription() override
Returns a program options description for all available options.
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
std::string m_treename
Name of the TTree inside the datafile containing the training data.
std::string m_target_variable
Target variable (branch name) defining the target.
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
std::string m_identifier
Identifier containing the finished training.
Abstract base class for different kinds of events.