Belle II Software  release-05-01-25
PDF.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2016 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Thomas Keck *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <mva/methods/PDF.h>
12 #include <framework/logging/Logger.h>
13 
14 namespace Belle2 {
19  namespace MVA {
20 
21  void PDFOptions::load(const boost::property_tree::ptree& pt)
22  {
23  int version = pt.get<int>("PDF_version");
24  if (version != 1) {
25  B2ERROR("Unkown weightfile version " << std::to_string(version));
26  throw std::runtime_error("Unkown weightfile version " + std::to_string(version));
27  }
28  m_binning = pt.get<std::string>("PDF_binning");
29  m_mode = pt.get<std::string>("PDF_mode");
30  m_nBins = pt.get<unsigned int>("PDF_nBins");
31 
32  }
33 
34  void PDFOptions::save(boost::property_tree::ptree& pt) const
35  {
36  pt.put("PDF_version", 1);
37  pt.put("PDF_binning", m_binning);
38  pt.put("PDF_mode", m_mode);
39  pt.put("PDF_nBins", m_nBins);
40  }
41 
42  po::options_description PDFOptions::getDescription()
43  {
44  po::options_description description("PDF options");
45  description.add_options()
46  ("binning", po::value<std::string>(&m_binning), "Binning used to bin the data (frequency|equidistant)")
47  ("nBins", po::value<unsigned int>(&m_nBins), "Number of bins used to bin the data")
48  ("mode", po::value<std::string>(&m_mode), "Mode used to calculate return value of expert");
49  return description;
50  }
51 
52 
53  PDFTeacher::PDFTeacher(const GeneralOptions& general_options, const PDFOptions& specific_options) : Teacher(general_options),
54  m_specific_options(specific_options) { }
55 
56  Weightfile PDFTeacher::train(Dataset& training_data) const
57  {
58 
59  unsigned int numberOfFeatures = training_data.getNumberOfFeatures();
60  if (numberOfFeatures != 1) {
61  B2ERROR("PDF method only supports exactly one feature!");
62  throw std::runtime_error("PDF method only supports exactly one feature!");
63  }
64  unsigned int numberOfEvents = training_data.getNumberOfEvents();
65 
66  std::vector<float> data(numberOfEvents);
67  std::vector<float> weights(numberOfEvents);
68  std::vector<bool> isSignal(numberOfEvents);
69  for (unsigned int iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
70  training_data.loadEvent(iEvent);
71  data[iEvent] = training_data.m_input[0];
72  weights[iEvent] = training_data.m_weight;
73  isSignal[iEvent] = training_data.m_isSignal;
74  }
75 
76  unsigned int nBins = m_specific_options.m_nBins;
77 
78  Binning binning;
79  if (m_specific_options.m_binning == "frequency")
80  binning = Binning::CreateEqualFrequency(data, weights, isSignal, nBins);
81  else if (m_specific_options.m_binning == "equidistant")
82  binning = Binning::CreateEquidistant(data, weights, isSignal, nBins);
83  else {
84  B2ERROR("Unkown binning in PDF Teacher named " << m_specific_options.m_binning);
85  throw std::runtime_error("Unkown binning in PDF Teacher named " + m_specific_options.m_binning);
86  }
87 
88  Weightfile weightfile;
89  std::string custom_weightfile = weightfile.generateFileName();
90  std::fstream file(custom_weightfile, std::ios_base::out | std::ios_base::trunc);
91 
92  std::vector<double> value(nBins, 0);
93  if (m_specific_options.m_mode == "probability") {
94  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
95  if (binning.m_signal_pdf[iBin] > 0)
96  value[iBin] = binning.m_signal_pdf[iBin] / (binning.m_signal_pdf[iBin] + binning.m_bckgrd_pdf[iBin]);
97  }
98  } else {
99  B2ERROR("Unkown mode in PDF Teacher named " << m_specific_options.m_mode);
100  throw std::runtime_error("Unkown mode in PDF Teacher named " + m_specific_options.m_mode);
101  }
102 
103  file << nBins << std::endl;
104  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
105  file << value[iBin] << " " << binning.m_boundaries[iBin] << std::endl;
106  }
107  file << binning.m_boundaries[nBins];
108 
109  file.close();
110 
111  weightfile.addOptions(m_general_options);
112  weightfile.addOptions(m_specific_options);
113  weightfile.addFile("PDF_Weightfile", custom_weightfile);
114  weightfile.addSignalFraction(training_data.getSignalFraction());
115 
116  return weightfile;
117 
118  }
119 
120  void PDFExpert::load(Weightfile& weightfile)
121  {
122 
123  std::string custom_weightfile = weightfile.generateFileName();
124  weightfile.getFile("PDF_Weightfile", custom_weightfile);
125  std::fstream file(custom_weightfile, std::ios_base::in);
126 
127  unsigned int nBins = 0;
128  file >> nBins;
129  m_value.resize(nBins, 0);
130 
131  std::vector<float> boundaries(nBins + 1, 0);
132 
133  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
134  file >> m_value[iBin] >> boundaries[iBin];
135  }
136  file >> boundaries[nBins];
137 
138  m_binning.m_boundaries = boundaries;
139 
140  file.close();
141 
142  weightfile.getOptions(m_specific_options);
143  }
144 
145  std::vector<float> PDFExpert::apply(Dataset& test_data) const
146  {
147  std::vector<float> probabilities(test_data.getNumberOfEvents(), 0);
148  for (unsigned int iEvent = 0; iEvent < test_data.getNumberOfEvents(); ++iEvent) {
149  test_data.loadEvent(iEvent);
150  unsigned int bin = m_binning.getBin(test_data.m_input[0]);
151  probabilities[iEvent] = m_value[bin];
152  }
153  return probabilities;
154  }
155 
156  }
158 }
Belle2::MVA::Binning::m_boundaries
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
Definition: Binning.h:63
Belle2::MVA::Binning::getBin
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
Definition: Binning.cc:44
Belle2::MVA::PDFExpert::m_specific_options
PDFOptions m_specific_options
Specific options of the PDF method.
Definition: PDF.h:104
Belle2::MVA::PDFOptions::m_nBins
unsigned int m_nBins
number of bins used to bin the data
Definition: PDF.h:58
Belle2::MVA::Binning::CreateEqualFrequency
static Binning CreateEqualFrequency(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equal frequency (aka equal-statistics) binning.
Definition: Binning.cc:103
Belle2::MVA::Binning::m_bckgrd_pdf
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
Definition: Binning.h:60
Belle2::MVA::PDFTeacher::m_specific_options
PDFOptions m_specific_options
Specific options of the PDF method.
Definition: PDF.h:82
Belle2::MVA::Binning::CreateEquidistant
static Binning CreateEquidistant(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equidistant binning.
Definition: Binning.cc:149
Belle2::MVA::PDFOptions::getDescription
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: PDF.cc:50
Belle2::MVA::PDFTeacher::PDFTeacher
PDFTeacher(const GeneralOptions &general_options, const PDFOptions &specific_options)
Constructs a new teacher using the GeneralOptions and PDFoptions for this training.
Definition: PDF.cc:61
Belle2::MVA::PDFOptions::m_mode
std::string m_mode
mode which defines the final output e.g.
Definition: PDF.h:57
Belle2::MVA::Teacher::m_general_options
GeneralOptions m_general_options
GeneralOptions containing all shared options.
Definition: Teacher.h:51
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MVA::Teacher
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:31
Belle2::MVA::PDFOptions::m_binning
std::string m_binning
which type of binning is performed e.g.
Definition: PDF.h:56
Belle2::MVA::PDFOptions
Options for the PDF MVA method.
Definition: PDF.h:31
Belle2::MVA::PDFExpert::load
virtual void load(Weightfile &weightfile) override
Load the PDF expert from a Weightfile.
Definition: PDF.cc:128
Belle2::MVA::GeneralOptions
General options which are shared by all MVA trainings.
Definition: Options.h:64
Belle2::MVA::PDFTeacher::train
virtual Weightfile train(Dataset &training_data) const override
Train PDF method using the given dataset returning a Weightfile.
Definition: PDF.cc:64
Belle2::MVA::PDFExpert::apply
virtual std::vector< float > apply(Dataset &test_data) const override
Apply PDF expert onto a dataset.
Definition: PDF.cc:153
Belle2::MVA::PDFOptions::load
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: PDF.cc:29
Belle2::MVA::PDFExpert::m_binning
Binning m_binning
used binning
Definition: PDF.h:105
Belle2::MVA::Binning
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
Definition: Binning.h:29
Belle2::MVA::PDFOptions::save
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: PDF.cc:42
Belle2::MVA::Binning::m_signal_pdf
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
Definition: Binning.h:58
Belle2::MVA::PDFExpert::m_value
std::vector< double > m_value
value returned by expert for each bin
Definition: PDF.h:106