Belle II Software  release-08-01-10
PDF.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <mva/methods/PDF.h>
10 #include <framework/logging/Logger.h>
11 
12 namespace Belle2 {
17  namespace MVA {
18 
19  void PDFOptions::load(const boost::property_tree::ptree& pt)
20  {
21  int version = pt.get<int>("PDF_version");
22  if (version != 1) {
23  B2ERROR("Unknown weightfile version " << std::to_string(version));
24  throw std::runtime_error("Unknown weightfile version " + std::to_string(version));
25  }
26  m_binning = pt.get<std::string>("PDF_binning");
27  m_mode = pt.get<std::string>("PDF_mode");
28  m_nBins = pt.get<unsigned int>("PDF_nBins");
29 
30  }
31 
32  void PDFOptions::save(boost::property_tree::ptree& pt) const
33  {
34  pt.put("PDF_version", 1);
35  pt.put("PDF_binning", m_binning);
36  pt.put("PDF_mode", m_mode);
37  pt.put("PDF_nBins", m_nBins);
38  }
39 
40  po::options_description PDFOptions::getDescription()
41  {
42  po::options_description description("PDF options");
43  description.add_options()
44  ("binning", po::value<std::string>(&m_binning), "Binning used to bin the data (frequency|equidistant)")
45  ("nBins", po::value<unsigned int>(&m_nBins), "Number of bins used to bin the data")
46  ("mode", po::value<std::string>(&m_mode), "Mode used to calculate return value of expert");
47  return description;
48  }
49 
50 
51  PDFTeacher::PDFTeacher(const GeneralOptions& general_options, const PDFOptions& specific_options) : Teacher(general_options),
52  m_specific_options(specific_options) { }
53 
54  Weightfile PDFTeacher::train(Dataset& training_data) const
55  {
56 
57  unsigned int numberOfFeatures = training_data.getNumberOfFeatures();
58  if (numberOfFeatures != 1) {
59  B2ERROR("PDF method only supports exactly one feature!");
60  throw std::runtime_error("PDF method only supports exactly one feature!");
61  }
62  unsigned int numberOfEvents = training_data.getNumberOfEvents();
63 
64  std::vector<float> data(numberOfEvents);
65  std::vector<float> weights(numberOfEvents);
66  std::vector<bool> isSignal(numberOfEvents);
67  for (unsigned int iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
68  training_data.loadEvent(iEvent);
69  data[iEvent] = training_data.m_input[0];
70  weights[iEvent] = training_data.m_weight;
71  isSignal[iEvent] = training_data.m_isSignal;
72  }
73 
74  unsigned int nBins = m_specific_options.m_nBins;
75 
76  Binning binning;
77  if (m_specific_options.m_binning == "frequency")
78  binning = Binning::CreateEqualFrequency(data, weights, isSignal, nBins);
79  else if (m_specific_options.m_binning == "equidistant")
80  binning = Binning::CreateEquidistant(data, weights, isSignal, nBins);
81  else {
82  B2ERROR("Unknown binning in PDF Teacher named " << m_specific_options.m_binning);
83  throw std::runtime_error("Unknown binning in PDF Teacher named " + m_specific_options.m_binning);
84  }
85 
86  Weightfile weightfile;
87  std::string custom_weightfile = weightfile.generateFileName();
88  std::fstream file(custom_weightfile, std::ios_base::out | std::ios_base::trunc);
89 
90  std::vector<double> value(nBins, 0);
91  if (m_specific_options.m_mode == "probability") {
92  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
93  if (binning.m_signal_pdf[iBin] > 0)
94  value[iBin] = binning.m_signal_pdf[iBin] / (binning.m_signal_pdf[iBin] + binning.m_bckgrd_pdf[iBin]);
95  }
96  } else {
97  B2ERROR("Unknown mode in PDF Teacher named " << m_specific_options.m_mode);
98  throw std::runtime_error("Unknown mode in PDF Teacher named " + m_specific_options.m_mode);
99  }
100 
101  file << nBins << std::endl;
102  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
103  file << value[iBin] << " " << binning.m_boundaries[iBin] << std::endl;
104  }
105  file << binning.m_boundaries[nBins];
106 
107  file.close();
108 
109  weightfile.addOptions(m_general_options);
110  weightfile.addOptions(m_specific_options);
111  weightfile.addFile("PDF_Weightfile", custom_weightfile);
112  weightfile.addSignalFraction(training_data.getSignalFraction());
113 
114  return weightfile;
115 
116  }
117 
118  void PDFExpert::load(Weightfile& weightfile)
119  {
120 
121  std::string custom_weightfile = weightfile.generateFileName();
122  weightfile.getFile("PDF_Weightfile", custom_weightfile);
123  std::fstream file(custom_weightfile, std::ios_base::in);
124 
125  unsigned int nBins = 0;
126  file >> nBins;
127  m_value.resize(nBins, 0);
128 
129  std::vector<float> boundaries(nBins + 1, 0);
130 
131  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
132  file >> m_value[iBin] >> boundaries[iBin];
133  }
134  file >> boundaries[nBins];
135 
136  m_binning.m_boundaries = boundaries;
137 
138  file.close();
139 
140  weightfile.getOptions(m_specific_options);
141  }
142 
143  std::vector<float> PDFExpert::apply(Dataset& test_data) const
144  {
145  std::vector<float> probabilities(test_data.getNumberOfEvents(), 0);
146  for (unsigned int iEvent = 0; iEvent < test_data.getNumberOfEvents(); ++iEvent) {
147  test_data.loadEvent(iEvent);
148  unsigned int bin = m_binning.getBin(test_data.m_input[0]);
149  probabilities[iEvent] = m_value[bin];
150  }
151  return probabilities;
152  }
153 
154  }
156 }
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
Definition: Binning.h:27
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
Definition: Binning.h:58
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
Definition: Binning.h:56
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
Definition: Binning.h:61
static Binning CreateEquidistant(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equidistant binning.
Definition: Binning.cc:139
static Binning CreateEqualFrequency(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equal frequency (aka equal-statistics) binning.
Definition: Binning.cc:93
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
Definition: Binning.cc:34
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
General options which are shared by all MVA trainings.
Definition: Options.h:62
Binning m_binning
used binning
Definition: PDF.h:103
virtual std::vector< float > apply(Dataset &test_data) const override
Apply PDF expert onto a dataset.
Definition: PDF.cc:143
PDFOptions m_specific_options
Specific options of the PDF method.
Definition: PDF.h:102
std::vector< double > m_value
value returned by expert for each bin
Definition: PDF.h:104
virtual void load(Weightfile &weightfile) override
Load the PDF expert from a Weightfile.
Definition: PDF.cc:118
Options for the PDF MVA method.
Definition: PDF.h:29
unsigned int m_nBins
number of bins used to bin the data
Definition: PDF.h:56
std::string m_binning
which type of binning is performed e.g.
Definition: PDF.h:54
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: PDF.cc:40
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: PDF.cc:19
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: PDF.cc:32
std::string m_mode
mode which defines the final output e.g.
Definition: PDF.h:55
PDFOptions m_specific_options
Specific options of the PDF method.
Definition: PDF.h:80
PDFTeacher(const GeneralOptions &general_options, const PDFOptions &specific_options)
Constructs a new teacher using the GeneralOptions and PDFoptions for this training.
Definition: PDF.cc:51
virtual Weightfile train(Dataset &training_data) const override
Train PDF method using the given dataset returning a Weightfile.
Definition: PDF.cc:54
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:29
GeneralOptions m_general_options
GeneralOptions containing all shared options.
Definition: Teacher.h:49
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
Definition: Weightfile.cc:115
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition: Weightfile.cc:62
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:67
void addSignalFraction(float signal_fraction)
Saves the signal fraction in the xml tree.
Definition: Weightfile.cc:95
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
Definition: Weightfile.cc:105
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Definition: Weightfile.cc:138
Abstract base class for different kinds of events.