Performs an splot training, convenience function.
316{
317
320 if (meta_options.m_splot_combined)
321 data_general_options.
m_identifier = general_options.m_identifier +
"_splot.xml";
323
325
328 discriminant_general_options.
m_variables = {meta_options.m_splot_variable};
329 ROOTDataset discriminant_dataset(discriminant_general_options);
330
331 discriminant_general_options.
m_target_variable = general_options.m_target_variable;
332
334 mc_general_options.
m_datafiles = meta_options.m_splot_mc_files;
335 mc_general_options.
m_variables = {meta_options.m_splot_variable};
337
338 auto mc_signals = mc_dataset.getSignals();
339 auto mc_weights = mc_dataset.getWeights();
340 auto mc_feature = mc_dataset.getFeature(0);
341 auto data_feature = discriminant_dataset.getFeature(0);
342 auto data_weights = discriminant_dataset.getWeights();
343
344 Binning binning = Binning::CreateEqualFrequency(mc_feature, mc_weights, mc_signals, 100);
345
347
348 std::vector<double> data(100, 0);
349 double total_data = 0.0;
350 for (unsigned int iEvent = 0; iEvent < data_dataset.getNumberOfEvents(); ++iEvent) {
351 data[binning.
getBin(data_feature[iEvent])] += data_weights[iEvent];
352 total_data += data_weights[iEvent];
353 }
354
355
356
357
358
359 float best_yield = 0.0;
360 double best_chi2 = 1000000000.0;
361 bool empty_bin = false;
362 for (double yield = 0; yield < total_data; yield += 1) {
363 double chi2 = 0.0;
364 for (unsigned int iBin = 0; iBin < 100; ++iBin) {
365 double deviation = (data[iBin] - (yield * binning.
m_signal_pdf[iBin] + (total_data - yield) * binning.
m_bckgrd_pdf[iBin]) *
367 if (data[iBin] > 0)
368 chi2 += deviation * deviation / data[iBin];
369 else
370 empty_bin = true;
371 }
372 if (chi2 < best_chi2) {
373 best_chi2 = chi2;
374 best_yield = yield;
375 }
376 }
377
378 if (empty_bin) {
379 B2WARNING("Encountered empty bin in data histogram during fit of the components for sPlot");
380 }
381
382 B2INFO("sPlot best yield " << best_yield);
384
387
389
390 if (meta_options.m_splot_boosted) {
392 boost_general_options.
m_identifier = general_options.m_identifier +
"_boost.xml";
393 SPlotDataset splot_dataset(boost_general_options, data_dataset, getBoostWeights(discriminant_dataset, binning), signalFraction);
394 auto boost_expert =
teacher_dataset(boost_general_options, specific_options, splot_dataset);
395
396 SPlotDataset aplot_dataset(data_general_options, data_dataset, getAPlotWeights(discriminant_dataset, binning,
397 boost_expert->apply(data_dataset)), signalFraction);
398 auto splot_expert =
teacher_dataset(data_general_options, specific_options, aplot_dataset);
399 if (not meta_options.m_splot_combined)
400 return splot_expert;
401 } else {
402 SPlotDataset splot_dataset(data_general_options, data_dataset, getSPlotWeights(discriminant_dataset, binning), signalFraction);
403 auto splot_expert =
teacher_dataset(data_general_options, specific_options, splot_dataset);
404 if (not meta_options.m_splot_combined)
405 return splot_expert;
406 }
407
408 mc_general_options.
m_identifier = general_options.m_identifier +
"_pdf.xml";
409 mc_general_options.
m_method =
"PDF";
411
412 auto pdf_expert =
teacher_dataset(mc_general_options, pdf_options, mc_dataset);
413
415 combination_general_options.
m_method =
"Combination";
416 combination_general_options.
m_variables.push_back(meta_options.m_splot_variable);
419 auto combination_expert =
teacher_dataset(combination_general_options, combination_options, data_dataset);
420
421 return combination_expert;
422}
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
double m_bckgrd_yield
Background yield in data distribution.
double m_signal_yield
Signal yield in data distribution.
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
Options for the Combination MVA method.
std::vector< std::string > m_weightfiles
Weightfiles of all methods we want to combine.
Options for the PDF MVA method.
Dataset for sPlot Wraps a dataset and provides each data-point twice, once as signal and once as back...