9#include <mva/interface/Options.h>
10#include <boost/property_tree/ptree.hpp>
21 po::options_description description(
"General options");
22 description.add_options()
23 (
"help",
"print this message")
24 (
"datafiles", po::value<std::vector<std::string>>(&
m_datafiles)->required()->multitoken(),
25 "ROOT files containing the training dataset")
26 (
"treename", po::value<std::string>(&
m_treename),
"Name of tree in ROOT datafile")
27 (
"identifier", po::value<std::string>(&
m_identifier)->required(),
"Identifier of the outputted weightfile")
28 (
"variables", po::value<std::vector<std::string>>(&
m_variables)->required()->multitoken(),
29 "feature variables used in the training")
30 (
"spectators", po::value<std::vector<std::string>>(&
m_spectators)->multitoken(),
31 "spectator variables used in the training")
33 "target variable used to distinguish between signal and background, isSignal is used as default.")
34 (
"signal_class", po::value<int>(&
m_signal_class),
"integer which identifies signal events")
35 (
"nClasses", po::value<unsigned int>(&
m_nClasses),
36 "number of classes under consideration. Must be supplied for multiclass classifications. Not all methods support multiclass classification.")
37 (
"weight_variable", po::value<std::string>(&
m_weight_variable),
"weight variable used to weight each event")
38 (
"max_events", po::value<unsigned int>(&
m_max_events),
"maximum number of events to process, 0 means all")
39 (
"method", po::value<std::string>(&
m_method)->required(),
40 "MVA Method [FastBDT|TMVAClassification|TMVARegression|Python|FANN|]");
46 m_method = pt.get<std::string>(
"method");
53 m_nClasses = pt.get<
unsigned int>(
"nClasses", 2u);
55 unsigned int numberOfFiles = pt.get<
unsigned int>(
"number_data_files", 0);
57 for (
unsigned int i = 0; i < numberOfFiles; ++i) {
58 m_datafiles[i] = pt.get<std::string>(std::string(
"datafile") + std::to_string(i));
61 unsigned int numberOfSpectators = pt.get<
unsigned int>(
"number_spectator_variables", 0u);
63 for (
unsigned int i = 0; i < numberOfSpectators; ++i) {
64 m_spectators[i] = pt.get<std::string>(std::string(
"spectator") + std::to_string(i));
67 auto numberOfFeatures = pt.get<
unsigned int>(
"number_feature_variables");
69 for (
unsigned int i = 0; i < numberOfFeatures; ++i) {
70 m_variables[i] = pt.get<std::string>(std::string(
"variable") + std::to_string(i));
85 pt.put(
"number_feature_variables",
m_variables.size());
86 for (
unsigned int i = 0; i <
m_variables.size(); ++i) {
87 pt.put(std::string(
"variable") + std::to_string(i),
m_variables[i]);
90 pt.put(
"number_spectator_variables",
m_spectators.size());
92 pt.put(std::string(
"spectator") + std::to_string(i),
m_spectators[i]);
96 for (
unsigned int i = 0; i <
m_datafiles.size(); ++i) {
97 pt.put(std::string(
"datafile") + std::to_string(i),
m_datafiles[i]);
103 po::options_description description(
"Meta options");
104 description.add_options()
105 (
"use_splot", po::value<bool>(&
m_use_splot),
"whether to do an splot training")
106 (
"splot_variable", po::value<std::string>(&
m_splot_variable),
"Variable used as discriminating variable in sPlot training")
107 (
"splot_mc_files", po::value<std::vector<std::string>>(&
m_splot_mc_files)->multitoken(),
108 "Monte carlo files containing the discriminant variable with the mc truth")
109 (
"splot_combined", po::value<bool>(&
m_splot_combined),
"Combine sPlot training with PDF classifier for discriminating variable")
110 (
"splot_boosted", po::value<bool>(&
m_splot_boosted),
"Use boosted sPlot training (aPlot)")
112 (
"sideband_mc_files", po::value<std::vector<std::string>>(&
m_sideband_mc_files)->multitoken(),
113 "Monte carlo files used to estimate the number of events in the different regions. (Must contain the same signal / background distribution as is expected in data)")
115 "Variable defining the signal region (1) background region (2) negative signal region (3) or unused (otherwise) for the sideband subtraction")
116 (
"use_reweighting", po::value<bool>(&
m_use_reweighting),
"whether to do a reweighting pre training")
118 "Variable defining for which events the reweighting should be used (1) or not used (0). If empty the reweighting is applied to all events")
120 "Identifier used to save the reweighting expert.")
122 "Monte carlo files for the reweighting pretraining (Must contain the same luminosity as the given data files)")
124 "Data files for the reweighting pretraining (Must contain the same luminosity as the given MC files)");
135 unsigned int splot_number_of_mc_files = pt.get<
unsigned int>(
"splot_number_of_mc_files", 0);
137 for (
unsigned int i = 0; i < splot_number_of_mc_files; ++i) {
138 m_splot_mc_files[i] = pt.get<std::string>(std::string(
"splot_mc_file") + std::to_string(i));
144 unsigned int sideband_number_of_mc_files = pt.get<
unsigned int>(
"sideband_number_of_mc_files", 0);
146 for (
unsigned int i = 0; i < sideband_number_of_mc_files; ++i) {
147 m_sideband_mc_files[i] = pt.get<std::string>(std::string(
"sideband_mc_file") + std::to_string(i));
154 unsigned int reweighting_number_of_mc_files = pt.get<
unsigned int>(
"reweighting_number_of_mc_files", 0);
156 for (
unsigned int i = 0; i < reweighting_number_of_mc_files; ++i) {
160 unsigned int reweighting_number_of_data_files = pt.get<
unsigned int>(
"reweighting_number_of_data_files", 0);
162 for (
unsigned int i = 0; i < reweighting_number_of_data_files; ++i) {
177 pt.put(std::string(
"splot_mc_file") + std::to_string(i),
m_splot_mc_files[i]);
std::vector< std::string > m_datafiles
Name of the datafiles containing the training data.
int m_signal_class
Signal class which is used as signal in a classification problem.
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
std::string m_weight_variable
Weight variable (branch name) defining the weights.
std::vector< std::string > m_spectators
Vector of all spectators (branch names) used in the training.
std::string m_method
Name of the MVA method to use.
unsigned int m_max_events
Maximum number of events to process, 0 means all.
virtual po::options_description getDescription() override
Returns a program options description for all available options.
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
std::string m_treename
Name of the TTree inside the datafile containing the training data.
std::string m_target_variable
Target variable (branch name) defining the target.
unsigned int m_nClasses
Number of classes in a classification problem.
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
std::string m_identifier
Identifier containing the finished training.
Abstract base class for different kinds of events.