Belle II Software  release-05-01-25
test_DataDriven.cc
1 /* BASF2 (Belle Analysis Framework 2) *
2  * Copyright(C) 2016 - Belle II Collaboration *
3  * *
4  * Author: The Belle II Collaboration *
5  * Contributors: Thomas Keck *
6  * *
7  * This software is provided "as is" without any warranty. *
8  **************************************************************************/
9 
10 #include <mva/utility/DataDriven.h>
11 #include <mva/interface/Interface.h>
12 #include <framework/utilities/FileSystem.h>
13 #include <framework/utilities/TestHelpers.h>
14 
15 #include <gtest/gtest.h>
16 #include <numeric>
17 
18 using namespace Belle2;
19 
20 namespace {
21 
22  class TestDataset : public MVA::Dataset {
23  public:
24  explicit TestDataset(MVA::GeneralOptions& general_options) : MVA::Dataset(general_options)
25  {
26  m_input = {0.0, 0.0};
27  m_target = 0.0;
28  m_isSignal = false;
29  m_weight = 1.0;
30  // Suppress cppcheck remark
31  // performance: Variable 'm_a' is assigned in constructor body. Consider performing initialization in initialization list.
32  // Initializing this vector in the initialization list is not readable, and performance is negligible here
33  // cppcheck-suppress *
34  m_a = {1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 3.0, 4.0, 4.0, 4.0, 4.0};
35  }
36 
37  [[nodiscard]] unsigned int getNumberOfFeatures() const override { return 1; }
38  [[nodiscard]] unsigned int getNumberOfSpectators() const override { return 0; }
39  [[nodiscard]] unsigned int getNumberOfEvents() const override { return 20; }
40  void loadEvent(unsigned int iEvent) override { m_input[0] = m_a[iEvent]; m_target = iEvent % 2; m_isSignal = m_target == 1; };
41  float getSignalFraction() override { return 0.5; };
42  std::vector<float> getFeature(unsigned int) override { return m_a; }
43 
44  std::vector<float> m_a;
45 
46  };
47 
48  TEST(SPlotTest, SPlotDataset)
49  {
50 
51  MVA::GeneralOptions general_options;
52  general_options.m_variables = {"A"};
53  TestDataset dataset(general_options);
54 
55  std::vector<float> weights(40);
56  std::iota(weights.begin(), weights.end(), 0.0);
57  MVA::SPlotDataset splot_dataset(general_options, dataset, weights, 0.5);
58 
59  EXPECT_EQ(splot_dataset.getNumberOfFeatures(), 1);
60  EXPECT_EQ(splot_dataset.getNumberOfEvents(), 40);
61  EXPECT_EQ(splot_dataset.getSignalFraction(), 0.5);
62 
63  auto feature = dataset.getFeature(0);
64  for (unsigned int i = 0; i < 40; ++i) {
65  splot_dataset.loadEvent(i);
66  EXPECT_FLOAT_EQ(splot_dataset.m_input[0], feature[i / 2]);
67  EXPECT_FLOAT_EQ(splot_dataset.m_weight, 1.0 * i);
68  EXPECT_EQ(splot_dataset.m_isSignal, (i % 2) == 0);
69  }
70 
71  }
72 
73  TEST(ReweightingTest, ReweightingDataset)
74  {
75 
76  MVA::GeneralOptions general_options;
77  general_options.m_variables = {"A"};
78  TestDataset dataset(general_options);
79 
80  std::vector<float> weights(20);
81  std::iota(weights.begin(), weights.end(), 0.0);
82  MVA::ReweightingDataset reweighting_dataset(general_options, dataset, weights);
83 
84  EXPECT_EQ(reweighting_dataset.getNumberOfFeatures(), 1);
85  EXPECT_EQ(reweighting_dataset.getNumberOfEvents(), 20);
86 
87  auto feature = dataset.getFeature(0);
88  for (unsigned int i = 0; i < 20; ++i) {
89  reweighting_dataset.loadEvent(i);
90  EXPECT_FLOAT_EQ(reweighting_dataset.m_input[0], feature[i]);
91  EXPECT_FLOAT_EQ(reweighting_dataset.m_weight, 1.0 * i);
92  EXPECT_EQ(reweighting_dataset.m_isSignal, (i % 2) == 1);
93  }
94 
95  }
96 
97  TEST(SPlotTest, GetSPlotWeights)
98  {
99 
100  MVA::GeneralOptions general_options;
101  general_options.m_variables = {"A", "D"};
102  TestDataset dataset(general_options);
103 
104  MVA::Binning binning = MVA::Binning::CreateEquidistant(dataset.getFeature(0), dataset.getWeights(), dataset.getSignals(), 4);
105 
106  EXPECT_EQ(binning.m_boundaries.size(), 5);
107  EXPECT_FLOAT_EQ(binning.m_boundaries[0], 1.0);
108  EXPECT_FLOAT_EQ(binning.m_boundaries[1], 1.75);
109  EXPECT_FLOAT_EQ(binning.m_boundaries[2], 2.5);
110  EXPECT_FLOAT_EQ(binning.m_boundaries[3], 3.25);
111  EXPECT_FLOAT_EQ(binning.m_boundaries[4], 4.0);
112 
113  EXPECT_EQ(binning.m_signal_pdf.size(), 4);
114  EXPECT_FLOAT_EQ(binning.m_signal_pdf[0], 0.2 * 4.0);
115  EXPECT_FLOAT_EQ(binning.m_signal_pdf[1], 0.3 * 4.0);
116  EXPECT_FLOAT_EQ(binning.m_signal_pdf[2], 0.3 * 4.0);
117  EXPECT_FLOAT_EQ(binning.m_signal_pdf[3], 0.2 * 4.0);
118 
119  EXPECT_EQ(binning.m_bckgrd_pdf.size(), 4);
120  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[0], 0.3 * 4.0);
121  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[1], 0.2 * 4.0);
122  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[2], 0.2 * 4.0);
123  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[3], 0.3 * 4.0);
124 
125  EXPECT_FLOAT_EQ(binning.m_signal_yield, 10);
126  EXPECT_FLOAT_EQ(binning.m_bckgrd_yield, 10);
127 
128  auto splot_weights = MVA::getSPlotWeights(dataset, binning);
129 
130  double sum = 0;
131  for (auto& s : splot_weights)
132  sum += s;
133  EXPECT_FLOAT_EQ(sum, 20.0);
134 
135  EXPECT_EQ(splot_weights.size(), 40);
136  for (unsigned int i = 0; i < 10; i += 2) {
137  EXPECT_FLOAT_EQ(splot_weights[i], -2.0);
138  EXPECT_FLOAT_EQ(splot_weights[i + 1], 3.0);
139  }
140  for (unsigned int i = 10; i < 20; i += 2) {
141  EXPECT_FLOAT_EQ(splot_weights[i], 3.0);
142  EXPECT_FLOAT_EQ(splot_weights[i + 1], -2.0);
143  }
144  for (unsigned int i = 20; i < 28; i += 2) {
145  EXPECT_FLOAT_EQ(splot_weights[i], 3.0);
146  EXPECT_FLOAT_EQ(splot_weights[i + 1], -2.0);
147  }
148  EXPECT_FLOAT_EQ(splot_weights[28], -2.0);
149  EXPECT_FLOAT_EQ(splot_weights[29], 3.0);
150  EXPECT_FLOAT_EQ(splot_weights[30], 3.0);
151  EXPECT_FLOAT_EQ(splot_weights[31], -2.0);
152  for (unsigned int i = 32; i < 40; i += 2) {
153  EXPECT_FLOAT_EQ(splot_weights[i], -2.0);
154  EXPECT_FLOAT_EQ(splot_weights[i + 1], 3.0);
155  }
156 
157  }
158 
159  TEST(SPlotTest, GetBoostWeights)
160  {
161 
162  MVA::GeneralOptions general_options;
163  general_options.m_variables = {"A"};
164  TestDataset dataset(general_options);
165 
166  MVA::Binning binning = MVA::Binning::CreateEquidistant(dataset.getFeature(0), dataset.getWeights(), dataset.getSignals(), 4);
167 
168  auto boost_weights = MVA::getBoostWeights(dataset, binning);
169 
170  EXPECT_EQ(boost_weights.size(), 40);
171  for (unsigned int i = 0; i < 10; i += 2) {
172  EXPECT_FLOAT_EQ(boost_weights[i], 0.2 / 0.3 / 4.0);
173  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.8 / 0.3 / 4.0);
174  }
175  for (unsigned int i = 10; i < 20; i += 2) {
176  EXPECT_FLOAT_EQ(boost_weights[i], 0.5 / 0.2 / 4.0);
177  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.5 / 0.2 / 4.0);
178  }
179  for (unsigned int i = 20; i < 28; i += 2) {
180  EXPECT_FLOAT_EQ(boost_weights[i], 0.8 / 0.2 / 4.0);
181  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.2 / 0.2 / 4.0);
182  }
183  EXPECT_FLOAT_EQ(boost_weights[28], 1.0 / 0.3 / 4.0);
184  EXPECT_FLOAT_EQ(boost_weights[29], 0.0 / 0.3 / 4.0);
185  EXPECT_FLOAT_EQ(boost_weights[30], 0.8 / 0.2 / 4.0);
186  EXPECT_FLOAT_EQ(boost_weights[31], 0.2 / 0.2 / 4.0);
187  for (unsigned int i = 32; i < 40; i += 2) {
188  EXPECT_FLOAT_EQ(boost_weights[i], 1.0 / 0.3 / 4.0);
189  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.0 / 0.3 / 4.0);
190  }
191 
192  }
193 
194  TEST(SPlotTest, GetAPlotWeights)
195  {
196 
197  MVA::GeneralOptions general_options;
198  general_options.m_variables = {"A"};
199  TestDataset dataset(general_options);
200 
201  MVA::Binning binning = MVA::Binning::CreateEquidistant(dataset.getFeature(0), dataset.getWeights(), dataset.getSignals(), 4);
202 
203  std::vector<float> boost_prediction = {0.0, 0.005, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45,
204  0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.9, 0.995, 1.0
205  };
206  auto aplot_weights = MVA::getAPlotWeights(dataset, binning, boost_prediction);
207 
208  // Regularisation
209  boost_prediction[0] = 0.005;
210  boost_prediction[19] = 0.995;
211 
212  auto splot_weights = MVA::getSPlotWeights(dataset, binning);
213 
214  EXPECT_EQ(aplot_weights.size(), 40);
215  for (unsigned int i = 0; i < 10; i += 2) {
216  double aplot = 0.1 / boost_prediction[i / 2] + 0.4 / (1 - boost_prediction[i / 2]);
217  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
218  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
219  }
220  for (unsigned int i = 10; i < 20; i += 2) {
221  double aplot = 0.25 / boost_prediction[i / 2] + 0.25 / (1 - boost_prediction[i / 2]);
222  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
223  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
224  }
225  for (unsigned int i = 20; i < 28; i += 2) {
226  double aplot = 0.4 / boost_prediction[i / 2] + 0.1 / (1 - boost_prediction[i / 2]);
227  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
228  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
229  }
230  {
231  double aplot = 0.5 / boost_prediction[14];
232  EXPECT_FLOAT_EQ(aplot_weights[28], aplot * splot_weights[28]);
233  EXPECT_FLOAT_EQ(aplot_weights[29], aplot * splot_weights[29]);
234  aplot = 0.4 / boost_prediction[15] + 0.1 / (1 - boost_prediction[15]);
235  EXPECT_FLOAT_EQ(aplot_weights[30], aplot * splot_weights[30]);
236  EXPECT_FLOAT_EQ(aplot_weights[31], aplot * splot_weights[31]);
237  }
238  for (unsigned int i = 32; i < 40; i += 2) {
239  double aplot = 0.5 / boost_prediction[i / 2];
240  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
241  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
242  }
243 
244  }
245 
246 }
Belle2::MVA::Binning::m_bckgrd_yield
double m_bckgrd_yield
Background yield in data distribution.
Definition: Binning.h:56
Belle2::MVA::Binning::m_boundaries
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
Definition: Binning.h:63
Belle2::MVA::Dataset
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:34
Belle2::MVA::Binning::m_bckgrd_pdf
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
Definition: Binning.h:60
Belle2::MVA::SPlotDataset
Dataset for sPlot Wraps a dataset and provides each data-point twice, once as signal and once as back...
Definition: DataDriven.h:163
Belle2::MVA::Binning::CreateEquidistant
static Binning CreateEquidistant(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equidistant binning.
Definition: Binning.cc:149
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MVA::ReweightingDataset
Dataset for Reweighting Wraps a dataset and provides each data-point with a new weight.
Definition: DataDriven.h:31
Belle2::MVA::GeneralOptions
General options which are shared by all MVA trainings.
Definition: Options.h:64
Belle2::TEST
TEST(TestgetDetectorRegion, TestgetDetectorRegion)
Test Constructors.
Definition: utilityFunctions.cc:18
Belle2::MVA::Binning
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
Definition: Binning.h:29
Belle2::MVA::Binning::m_signal_pdf
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
Definition: Binning.h:58
Belle2::MVA::Binning::m_signal_yield
double m_signal_yield
Signal yield in data distribution.
Definition: Binning.h:55