Belle II Software development
PDF.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <mva/methods/PDF.h>
10#include <framework/logging/Logger.h>
11
12namespace Belle2 {
17 namespace MVA {
18
19 void PDFOptions::load(const boost::property_tree::ptree& pt)
20 {
21 int version = pt.get<int>("PDF_version");
22 if (version != 1) {
23 B2ERROR("Unknown weightfile version " << std::to_string(version));
24 throw std::runtime_error("Unknown weightfile version " + std::to_string(version));
25 }
26 m_binning = pt.get<std::string>("PDF_binning");
27 m_mode = pt.get<std::string>("PDF_mode");
28 m_nBins = pt.get<unsigned int>("PDF_nBins");
29
30 }
31
32 void PDFOptions::save(boost::property_tree::ptree& pt) const
33 {
34 pt.put("PDF_version", 1);
35 pt.put("PDF_binning", m_binning);
36 pt.put("PDF_mode", m_mode);
37 pt.put("PDF_nBins", m_nBins);
38 }
39
40 po::options_description PDFOptions::getDescription()
41 {
42 po::options_description description("PDF options");
43 description.add_options()
44 ("binning", po::value<std::string>(&m_binning), "Binning used to bin the data (frequency|equidistant)")
45 ("nBins", po::value<unsigned int>(&m_nBins), "Number of bins used to bin the data")
46 ("mode", po::value<std::string>(&m_mode), "Mode used to calculate return value of expert");
47 return description;
48 }
49
50
51 PDFTeacher::PDFTeacher(const GeneralOptions& general_options, const PDFOptions& specific_options) : Teacher(general_options),
52 m_specific_options(specific_options) { }
53
54 Weightfile PDFTeacher::train(Dataset& training_data) const
55 {
56
57 unsigned int numberOfFeatures = training_data.getNumberOfFeatures();
58 if (numberOfFeatures != 1) {
59 B2ERROR("PDF method only supports exactly one feature!");
60 throw std::runtime_error("PDF method only supports exactly one feature!");
61 }
62 unsigned int numberOfEvents = training_data.getNumberOfEvents();
63
64 std::vector<float> data(numberOfEvents);
65 std::vector<float> weights(numberOfEvents);
66 std::vector<bool> isSignal(numberOfEvents);
67 for (unsigned int iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
68 training_data.loadEvent(iEvent);
69 data[iEvent] = training_data.m_input[0];
70 weights[iEvent] = training_data.m_weight;
71 isSignal[iEvent] = training_data.m_isSignal;
72 }
73
74 unsigned int nBins = m_specific_options.m_nBins;
75
76 Binning binning;
77 if (m_specific_options.m_binning == "frequency")
78 binning = Binning::CreateEqualFrequency(data, weights, isSignal, nBins);
79 else if (m_specific_options.m_binning == "equidistant")
80 binning = Binning::CreateEquidistant(data, weights, isSignal, nBins);
81 else {
82 B2ERROR("Unknown binning in PDF Teacher named " << m_specific_options.m_binning);
83 throw std::runtime_error("Unknown binning in PDF Teacher named " + m_specific_options.m_binning);
84 }
85
86 Weightfile weightfile;
87 std::string custom_weightfile = weightfile.generateFileName();
88 std::fstream file(custom_weightfile, std::ios_base::out | std::ios_base::trunc);
89
90 std::vector<double> value(nBins, 0);
91 if (m_specific_options.m_mode == "probability") {
92 for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
93 if (binning.m_signal_pdf[iBin] > 0)
94 value[iBin] = binning.m_signal_pdf[iBin] / (binning.m_signal_pdf[iBin] + binning.m_bckgrd_pdf[iBin]);
95 }
96 } else {
97 B2ERROR("Unknown mode in PDF Teacher named " << m_specific_options.m_mode);
98 throw std::runtime_error("Unknown mode in PDF Teacher named " + m_specific_options.m_mode);
99 }
100
101 file << nBins << std::endl;
102 for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
103 file << value[iBin] << " " << binning.m_boundaries[iBin] << std::endl;
104 }
105 file << binning.m_boundaries[nBins];
106
107 file.close();
108
109 weightfile.addOptions(m_general_options);
110 weightfile.addOptions(m_specific_options);
111 weightfile.addFile("PDF_Weightfile", custom_weightfile);
112 weightfile.addSignalFraction(training_data.getSignalFraction());
113
114 return weightfile;
115
116 }
117
118 void PDFExpert::load(Weightfile& weightfile)
119 {
120
121 std::string custom_weightfile = weightfile.generateFileName();
122 weightfile.getFile("PDF_Weightfile", custom_weightfile);
123 std::fstream file(custom_weightfile, std::ios_base::in);
124
125 unsigned int nBins = 0;
126 file >> nBins;
127 m_value.resize(nBins, 0);
128
129 std::vector<float> boundaries(nBins + 1, 0);
130
131 for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
132 file >> m_value[iBin] >> boundaries[iBin];
133 }
134 file >> boundaries[nBins];
135
136 m_binning.m_boundaries = boundaries;
137
138 file.close();
139
140 weightfile.getOptions(m_specific_options);
141 }
142
143 std::vector<float> PDFExpert::apply(Dataset& test_data) const
144 {
145 std::vector<float> probabilities(test_data.getNumberOfEvents(), 0);
146 for (unsigned int iEvent = 0; iEvent < test_data.getNumberOfEvents(); ++iEvent) {
147 test_data.loadEvent(iEvent);
148 unsigned int bin = m_binning.getBin(test_data.m_input[0]);
149 probabilities[iEvent] = m_value[bin];
150 }
151 return probabilities;
152 }
153
154 }
156}
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
Definition: Binning.h:27
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
Definition: Binning.h:58
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
Definition: Binning.h:56
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
Definition: Binning.h:61
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
Definition: Binning.cc:34
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
General options which are shared by all MVA trainings.
Definition: Options.h:62
Binning m_binning
used binning
Definition: PDF.h:103
virtual std::vector< float > apply(Dataset &test_data) const override
Apply PDF expert onto a dataset.
Definition: PDF.cc:143
PDFOptions m_specific_options
Specific options of the PDF method.
Definition: PDF.h:102
std::vector< double > m_value
value returned by expert for each bin
Definition: PDF.h:104
virtual void load(Weightfile &weightfile) override
Load the PDF expert from a Weightfile.
Definition: PDF.cc:118
Options for the PDF MVA method.
Definition: PDF.h:29
unsigned int m_nBins
number of bins used to bin the data
Definition: PDF.h:56
std::string m_binning
which type of binning is performed e.g.
Definition: PDF.h:54
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: PDF.cc:40
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism (used by Weightfile) to load Options from a xml tree.
Definition: PDF.cc:19
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism (used by Weightfile) to store Options in a xml tree.
Definition: PDF.cc:32
std::string m_mode
mode which defines the final output e.g.
Definition: PDF.h:55
PDFOptions m_specific_options
Specific options of the PDF method.
Definition: PDF.h:80
PDFTeacher(const GeneralOptions &general_options, const PDFOptions &specific_options)
Constructs a new teacher using the GeneralOptions and PDFoptions for this training.
Definition: PDF.cc:51
virtual Weightfile train(Dataset &training_data) const override
Train PDF method using the given dataset returning a Weightfile.
Definition: PDF.cc:54
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:29
GeneralOptions m_general_options
GeneralOptions containing all shared options.
Definition: Teacher.h:49
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
Definition: Weightfile.cc:115
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition: Weightfile.cc:62
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:67
void addSignalFraction(float signal_fraction)
Saves the signal fraction in the xml tree.
Definition: Weightfile.cc:95
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
Definition: Weightfile.cc:105
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Definition: Weightfile.cc:138
Abstract base class for different kinds of events.