122 {
123
124 unsigned int numberOfFeatures = training_data.getNumberOfFeatures();
125 unsigned int numberOfSpectators = training_data.getNumberOfSpectators();
126
129 B2ERROR("You provided individual nCut values for each feature and spectator, but the total number of provided cuts is not same as as the total number of features and spectators.");
130 }
131
134 if (individualPurityTransformation.size() == 0) {
135 for (unsigned int i = 0; i < numberOfFeatures; ++i) {
136 individualPurityTransformation.push_back(true);
137 }
138 }
139 }
140
142 if (individual_nCuts.size() == 0) {
143 for (unsigned int i = 0; i < numberOfFeatures + numberOfSpectators; ++i) {
145 }
146 }
147
151 numberOfSpectators, true);
152
153 std::vector<std::vector<float>> X(numberOfFeatures + numberOfSpectators);
154 const auto& y = training_data.getSignals();
155 if (not isValidSignal(y)) {
156 B2FATAL("The training data is not valid. It only contains one class instead of two.");
157 }
158 const auto& w = training_data.getWeights();
159 for (unsigned int i = 0; i < numberOfFeatures; ++i) {
160 X[i] = training_data.getFeature(i);
161 }
162 for (unsigned int i = 0; i < numberOfSpectators; ++i) {
163 X[i + numberOfFeatures] = training_data.getSpectator(i);
164 }
165 classifier.fit(X, y, w);
166
167 Weightfile weightfile;
168 std::string custom_weightfile = weightfile.generateFileName();
169 std::fstream file(custom_weightfile, std::ios_base::out | std::ios_base::trunc);
170
171 file << classifier << std::endl;
172 file.close();
173
176 weightfile.addFile("FastBDT_Weightfile", custom_weightfile);
177 weightfile.addSignalFraction(training_data.getSignalFraction());
178
179 std::map<std::string, float> importance;
180 for (auto& pair : classifier.GetVariableRanking()) {
182 }
183 weightfile.addFeatureImportance(importance);
184
185 return weightfile;
186
187 }
std::vector< unsigned int > m_individual_nCuts
Number of cut Levels = log_2(Number of Cuts) for each provided feature.
bool m_sPlot
Activates sPlot sampling.
double m_randRatio
Fraction of data to use in the stochastic training.
double m_flatnessLoss
Flatness Loss constant.
double m_shrinkage
Shrinkage during the boosting step.
bool m_purityTransformation
Activates purity transformation globally for all features.
unsigned int m_nLevels
Depth of tree.
std::vector< bool > m_individualPurityTransformation
Vector which decided for each feature individually if the purity transformation should be used.
unsigned int m_nCuts
Number of cut Levels = log_2(Number of Cuts)
unsigned int m_nTrees
Number of trees.
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
GeneralOptions m_general_options
GeneralOptions containing all shared options.