122 {
123 if (training_data.getNumberOfEvents() > 5e+6) {
124 B2WARNING("Number of events for training exceeds 5 million. FastBDT performance starts getting worse when the number reaches O(10^7).");
125 }
126
127 unsigned int numberOfFeatures = training_data.getNumberOfFeatures();
128 unsigned int numberOfSpectators = training_data.getNumberOfSpectators();
129
132 B2ERROR("You provided individual nCut values for each feature and spectator, but the total number of provided cuts is not same as as the total number of features and spectators.");
133 }
134
137 if (individualPurityTransformation.size() == 0) {
138 for (unsigned int i = 0; i < numberOfFeatures; ++i) {
139 individualPurityTransformation.push_back(true);
140 }
141 }
142 }
143
145 if (individual_nCuts.size() == 0) {
146 for (unsigned int i = 0; i < numberOfFeatures + numberOfSpectators; ++i) {
148 }
149 }
150
154 numberOfSpectators, true);
155
156 std::vector<std::vector<float>> X(numberOfFeatures + numberOfSpectators);
157 const auto& y = training_data.getSignals();
158 if (not isValidSignal(y)) {
159 B2FATAL("The training data is not valid. It only contains one class instead of two.");
160 }
161 const auto& w = training_data.getWeights();
162 for (unsigned int i = 0; i < numberOfFeatures; ++i) {
163 X[i] = training_data.getFeature(i);
164 }
165 for (unsigned int i = 0; i < numberOfSpectators; ++i) {
166 X[i + numberOfFeatures] = training_data.getSpectator(i);
167 }
168 classifier.fit(X, y, w);
169
170 Weightfile weightfile;
171 std::string custom_weightfile = weightfile.generateFileName();
172 std::fstream file(custom_weightfile, std::ios_base::out | std::ios_base::trunc);
173
174 file << classifier << std::endl;
175 file.close();
176
179 weightfile.addFile("FastBDT_Weightfile", custom_weightfile);
180 weightfile.addSignalFraction(training_data.getSignalFraction());
181
182 std::map<std::string, float> importance;
183 for (auto& pair : classifier.GetVariableRanking()) {
185 }
186 weightfile.addFeatureImportance(importance);
187
188 return weightfile;
189
190 }
std::vector< unsigned int > m_individual_nCuts
Number of cut Levels = log_2(Number of Cuts) for each provided feature.
bool m_sPlot
Activates sPlot sampling.
double m_randRatio
Fraction of data to use in the stochastic training.
double m_flatnessLoss
Flatness Loss constant.
double m_shrinkage
Shrinkage during the boosting step.
bool m_purityTransformation
Activates purity transformation globally for all features.
unsigned int m_nLevels
Depth of tree.
std::vector< bool > m_individualPurityTransformation
Vector which decided for each feature individually if the purity transformation should be used.
unsigned int m_nCuts
Number of cut Levels = log_2(Number of Cuts)
unsigned int m_nTrees
Number of trees.
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
GeneralOptions m_general_options
GeneralOptions containing all shared options.