Performs an splot training, convenience function.
315{
316
317 GeneralOptions data_general_options = general_options;
319 if (meta_options.m_splot_combined)
320 data_general_options.
m_identifier = general_options.m_identifier +
"_splot.xml";
321 ROOTDataset data_dataset(data_general_options);
322
324
325 GeneralOptions discriminant_general_options = general_options;
327 discriminant_general_options.
m_variables = {meta_options.m_splot_variable};
328 ROOTDataset discriminant_dataset(discriminant_general_options);
329
330 discriminant_general_options.
m_target_variable = general_options.m_target_variable;
331
332 GeneralOptions mc_general_options = general_options;
333 mc_general_options.
m_datafiles = meta_options.m_splot_mc_files;
334 mc_general_options.
m_variables = {meta_options.m_splot_variable};
335 ROOTDataset mc_dataset(mc_general_options);
336
337 auto mc_signals = mc_dataset.getSignals();
338 auto mc_weights = mc_dataset.getWeights();
339 auto mc_feature = mc_dataset.getFeature(0);
340 auto data_feature = discriminant_dataset.getFeature(0);
341 auto data_weights = discriminant_dataset.getWeights();
342
343 Binning binning = Binning::CreateEqualFrequency(mc_feature, mc_weights, mc_signals, 100);
344
346
347 std::vector<double> data(100, 0);
348 double total_data = 0.0;
349 for (unsigned int iEvent = 0; iEvent < data_dataset.getNumberOfEvents(); ++iEvent) {
350 data[binning.
getBin(data_feature[iEvent])] += data_weights[iEvent];
351 total_data += data_weights[iEvent];
352 }
353
354
355
356
357
358 float best_yield = 0.0;
359 double best_chi2 = 1000000000.0;
360 bool empty_bin = false;
361 for (double yield = 0; yield < total_data; yield += 1) {
362 double chi2 = 0.0;
363 for (unsigned int iBin = 0; iBin < 100; ++iBin) {
364 double deviation = (data[iBin] - (yield * binning.
m_signal_pdf[iBin] + (total_data - yield) * binning.
m_bckgrd_pdf[iBin]) *
366 if (data[iBin] > 0)
367 chi2 += deviation * deviation / data[iBin];
368 else
369 empty_bin = true;
370 }
371 if (chi2 < best_chi2) {
372 best_chi2 = chi2;
373 best_yield = yield;
374 }
375 }
376
377 if (empty_bin) {
378 B2WARNING("Encountered empty bin in data histogram during fit of the components for sPlot");
379 }
380
381 B2INFO("sPlot best yield " << best_yield);
383
386
388
389 if (meta_options.m_splot_boosted) {
390 GeneralOptions boost_general_options = data_general_options;
391 boost_general_options.
m_identifier = general_options.m_identifier +
"_boost.xml";
392 SPlotDataset splot_dataset(boost_general_options, data_dataset, getBoostWeights(discriminant_dataset, binning), signalFraction);
393 auto boost_expert =
teacher_dataset(boost_general_options, specific_options, splot_dataset);
394
395 SPlotDataset aplot_dataset(data_general_options, data_dataset, getAPlotWeights(discriminant_dataset, binning,
396 boost_expert->apply(data_dataset)), signalFraction);
397 auto splot_expert =
teacher_dataset(data_general_options, specific_options, aplot_dataset);
398 if (not meta_options.m_splot_combined)
399 return splot_expert;
400 } else {
401 SPlotDataset splot_dataset(data_general_options, data_dataset, getSPlotWeights(discriminant_dataset, binning), signalFraction);
402 auto splot_expert =
teacher_dataset(data_general_options, specific_options, splot_dataset);
403 if (not meta_options.m_splot_combined)
404 return splot_expert;
405 }
406
407 mc_general_options.
m_identifier = general_options.m_identifier +
"_pdf.xml";
408 mc_general_options.
m_method =
"PDF";
409 PDFOptions pdf_options;
410
411 auto pdf_expert =
teacher_dataset(mc_general_options, pdf_options, mc_dataset);
412
413 GeneralOptions combination_general_options = general_options;
414 combination_general_options.
m_method =
"Combination";
415 combination_general_options.
m_variables.push_back(meta_options.m_splot_variable);
416 CombinationOptions combination_options;
418 auto combination_expert =
teacher_dataset(combination_general_options, combination_options, data_dataset);
419
420 return combination_expert;
421}
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
double m_bckgrd_yield
Background yield in data distribution.
double m_signal_yield
Signal yield in data distribution.
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
std::vector< std::string > m_weightfiles
Weightfiles of all methods we want to combine.
std::vector< std::pair< double, double > > Binning
Bin holder as vector for bin limit pairs: [energy limits, theta limits, phi limits].