Belle II Software  release-08-01-10
test_DataDriven.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <mva/utility/DataDriven.h>
10 #include <mva/interface/Interface.h>
11 #include <framework/utilities/FileSystem.h>
12 #include <framework/utilities/TestHelpers.h>
13 
14 #include <gtest/gtest.h>
15 #include <numeric>
16 
17 using namespace Belle2;
18 
19 namespace {
20 
21  class TestDataset : public MVA::Dataset {
22  public:
23  explicit TestDataset(MVA::GeneralOptions& general_options) : MVA::Dataset(general_options)
24  {
25  m_input = {0.0, 0.0};
26  m_target = 0.0;
27  m_isSignal = false;
28  m_weight = 1.0;
29  // Suppress cppcheck remark
30  // performance: Variable 'm_a' is assigned in constructor body. Consider performing initialization in initialization list.
31  // Initializing this vector in the initialization list is not readable, and performance is negligible here
32  // cppcheck-suppress *
33  m_a = {1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 3.0, 4.0, 4.0, 4.0, 4.0};
34  }
35 
36  [[nodiscard]] unsigned int getNumberOfFeatures() const override { return 1; }
37  [[nodiscard]] unsigned int getNumberOfSpectators() const override { return 0; }
38  [[nodiscard]] unsigned int getNumberOfEvents() const override { return 20; }
39  void loadEvent(unsigned int iEvent) override { m_input[0] = m_a[iEvent]; m_target = iEvent % 2; m_isSignal = m_target == 1; };
40  float getSignalFraction() override { return 0.5; };
41  std::vector<float> getFeature(unsigned int) override { return m_a; }
42 
43  std::vector<float> m_a;
44 
45  };
46 
47  TEST(SPlotTest, SPlotDataset)
48  {
49 
50  MVA::GeneralOptions general_options;
51  general_options.m_variables = {"A"};
52  TestDataset dataset(general_options);
53 
54  std::vector<float> weights(40);
55  std::iota(weights.begin(), weights.end(), 0.0);
56  MVA::SPlotDataset splot_dataset(general_options, dataset, weights, 0.5);
57 
58  EXPECT_EQ(splot_dataset.getNumberOfFeatures(), 1);
59  EXPECT_EQ(splot_dataset.getNumberOfEvents(), 40);
60  EXPECT_EQ(splot_dataset.getSignalFraction(), 0.5);
61 
62  auto feature = dataset.getFeature(0);
63  for (unsigned int i = 0; i < 40; ++i) {
64  splot_dataset.loadEvent(i);
65  EXPECT_FLOAT_EQ(splot_dataset.m_input[0], feature[i / 2]);
66  EXPECT_FLOAT_EQ(splot_dataset.m_weight, 1.0 * i);
67  EXPECT_EQ(splot_dataset.m_isSignal, (i % 2) == 0);
68  }
69 
70  }
71 
72  TEST(ReweightingTest, ReweightingDataset)
73  {
74 
75  MVA::GeneralOptions general_options;
76  general_options.m_variables = {"A"};
77  TestDataset dataset(general_options);
78 
79  std::vector<float> weights(20);
80  std::iota(weights.begin(), weights.end(), 0.0);
81  MVA::ReweightingDataset reweighting_dataset(general_options, dataset, weights);
82 
83  EXPECT_EQ(reweighting_dataset.getNumberOfFeatures(), 1);
84  EXPECT_EQ(reweighting_dataset.getNumberOfEvents(), 20);
85 
86  auto feature = dataset.getFeature(0);
87  for (unsigned int i = 0; i < 20; ++i) {
88  reweighting_dataset.loadEvent(i);
89  EXPECT_FLOAT_EQ(reweighting_dataset.m_input[0], feature[i]);
90  EXPECT_FLOAT_EQ(reweighting_dataset.m_weight, 1.0 * i);
91  EXPECT_EQ(reweighting_dataset.m_isSignal, (i % 2) == 1);
92  }
93 
94  }
95 
96  TEST(SPlotTest, GetSPlotWeights)
97  {
98 
99  MVA::GeneralOptions general_options;
100  general_options.m_variables = {"A", "D"};
101  TestDataset dataset(general_options);
102 
103  MVA::Binning binning = MVA::Binning::CreateEquidistant(dataset.getFeature(0), dataset.getWeights(), dataset.getSignals(), 4);
104 
105  EXPECT_EQ(binning.m_boundaries.size(), 5);
106  EXPECT_FLOAT_EQ(binning.m_boundaries[0], 1.0);
107  EXPECT_FLOAT_EQ(binning.m_boundaries[1], 1.75);
108  EXPECT_FLOAT_EQ(binning.m_boundaries[2], 2.5);
109  EXPECT_FLOAT_EQ(binning.m_boundaries[3], 3.25);
110  EXPECT_FLOAT_EQ(binning.m_boundaries[4], 4.0);
111 
112  EXPECT_EQ(binning.m_signal_pdf.size(), 4);
113  EXPECT_FLOAT_EQ(binning.m_signal_pdf[0], 0.2 * 4.0);
114  EXPECT_FLOAT_EQ(binning.m_signal_pdf[1], 0.3 * 4.0);
115  EXPECT_FLOAT_EQ(binning.m_signal_pdf[2], 0.3 * 4.0);
116  EXPECT_FLOAT_EQ(binning.m_signal_pdf[3], 0.2 * 4.0);
117 
118  EXPECT_EQ(binning.m_bckgrd_pdf.size(), 4);
119  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[0], 0.3 * 4.0);
120  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[1], 0.2 * 4.0);
121  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[2], 0.2 * 4.0);
122  EXPECT_FLOAT_EQ(binning.m_bckgrd_pdf[3], 0.3 * 4.0);
123 
124  EXPECT_FLOAT_EQ(binning.m_signal_yield, 10);
125  EXPECT_FLOAT_EQ(binning.m_bckgrd_yield, 10);
126 
127  auto splot_weights = MVA::getSPlotWeights(dataset, binning);
128 
129  double sum = 0;
130  for (auto& s : splot_weights)
131  sum += s;
132  EXPECT_FLOAT_EQ(sum, 20.0);
133 
134  EXPECT_EQ(splot_weights.size(), 40);
135  for (unsigned int i = 0; i < 10; i += 2) {
136  EXPECT_FLOAT_EQ(splot_weights[i], -2.0);
137  EXPECT_FLOAT_EQ(splot_weights[i + 1], 3.0);
138  }
139  for (unsigned int i = 10; i < 20; i += 2) {
140  EXPECT_FLOAT_EQ(splot_weights[i], 3.0);
141  EXPECT_FLOAT_EQ(splot_weights[i + 1], -2.0);
142  }
143  for (unsigned int i = 20; i < 28; i += 2) {
144  EXPECT_FLOAT_EQ(splot_weights[i], 3.0);
145  EXPECT_FLOAT_EQ(splot_weights[i + 1], -2.0);
146  }
147  EXPECT_FLOAT_EQ(splot_weights[28], -2.0);
148  EXPECT_FLOAT_EQ(splot_weights[29], 3.0);
149  EXPECT_FLOAT_EQ(splot_weights[30], 3.0);
150  EXPECT_FLOAT_EQ(splot_weights[31], -2.0);
151  for (unsigned int i = 32; i < 40; i += 2) {
152  EXPECT_FLOAT_EQ(splot_weights[i], -2.0);
153  EXPECT_FLOAT_EQ(splot_weights[i + 1], 3.0);
154  }
155 
156  }
157 
158  TEST(SPlotTest, GetBoostWeights)
159  {
160 
161  MVA::GeneralOptions general_options;
162  general_options.m_variables = {"A"};
163  TestDataset dataset(general_options);
164 
165  MVA::Binning binning = MVA::Binning::CreateEquidistant(dataset.getFeature(0), dataset.getWeights(), dataset.getSignals(), 4);
166 
167  auto boost_weights = MVA::getBoostWeights(dataset, binning);
168 
169  EXPECT_EQ(boost_weights.size(), 40);
170  for (unsigned int i = 0; i < 10; i += 2) {
171  EXPECT_FLOAT_EQ(boost_weights[i], 0.2 / 0.3 / 4.0);
172  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.8 / 0.3 / 4.0);
173  }
174  for (unsigned int i = 10; i < 20; i += 2) {
175  EXPECT_FLOAT_EQ(boost_weights[i], 0.5 / 0.2 / 4.0);
176  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.5 / 0.2 / 4.0);
177  }
178  for (unsigned int i = 20; i < 28; i += 2) {
179  EXPECT_FLOAT_EQ(boost_weights[i], 0.8 / 0.2 / 4.0);
180  // cppcheck-suppress duplicateExpression
181  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.2 / 0.2 / 4.0);
182  }
183  EXPECT_FLOAT_EQ(boost_weights[28], 1.0 / 0.3 / 4.0);
184  EXPECT_FLOAT_EQ(boost_weights[29], 0.0 / 0.3 / 4.0);
185  EXPECT_FLOAT_EQ(boost_weights[30], 0.8 / 0.2 / 4.0);
186  // cppcheck-suppress duplicateExpression
187  EXPECT_FLOAT_EQ(boost_weights[31], 0.2 / 0.2 / 4.0);
188  for (unsigned int i = 32; i < 40; i += 2) {
189  EXPECT_FLOAT_EQ(boost_weights[i], 1.0 / 0.3 / 4.0);
190  EXPECT_FLOAT_EQ(boost_weights[i + 1], 0.0 / 0.3 / 4.0);
191  }
192 
193  }
194 
195  TEST(SPlotTest, GetAPlotWeights)
196  {
197 
198  MVA::GeneralOptions general_options;
199  general_options.m_variables = {"A"};
200  TestDataset dataset(general_options);
201 
202  MVA::Binning binning = MVA::Binning::CreateEquidistant(dataset.getFeature(0), dataset.getWeights(), dataset.getSignals(), 4);
203 
204  std::vector<float> boost_prediction = {0.0, 0.005, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45,
205  0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.9, 0.995, 1.0
206  };
207  auto aplot_weights = MVA::getAPlotWeights(dataset, binning, boost_prediction);
208 
209  // Regularisation
210  boost_prediction[0] = 0.005;
211  boost_prediction[19] = 0.995;
212 
213  auto splot_weights = MVA::getSPlotWeights(dataset, binning);
214 
215  EXPECT_EQ(aplot_weights.size(), 40);
216  for (unsigned int i = 0; i < 10; i += 2) {
217  double aplot = 0.1 / boost_prediction[i / 2] + 0.4 / (1 - boost_prediction[i / 2]);
218  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
219  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
220  }
221  for (unsigned int i = 10; i < 20; i += 2) {
222  double aplot = 0.25 / boost_prediction[i / 2] + 0.25 / (1 - boost_prediction[i / 2]);
223  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
224  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
225  }
226  for (unsigned int i = 20; i < 28; i += 2) {
227  double aplot = 0.4 / boost_prediction[i / 2] + 0.1 / (1 - boost_prediction[i / 2]);
228  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
229  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
230  }
231  {
232  double aplot = 0.5 / boost_prediction[14];
233  EXPECT_FLOAT_EQ(aplot_weights[28], aplot * splot_weights[28]);
234  EXPECT_FLOAT_EQ(aplot_weights[29], aplot * splot_weights[29]);
235  aplot = 0.4 / boost_prediction[15] + 0.1 / (1 - boost_prediction[15]);
236  EXPECT_FLOAT_EQ(aplot_weights[30], aplot * splot_weights[30]);
237  EXPECT_FLOAT_EQ(aplot_weights[31], aplot * splot_weights[31]);
238  }
239  for (unsigned int i = 32; i < 40; i += 2) {
240  double aplot = 0.5 / boost_prediction[i / 2];
241  EXPECT_FLOAT_EQ(aplot_weights[i], aplot * splot_weights[i]);
242  EXPECT_FLOAT_EQ(aplot_weights[i + 1], aplot * splot_weights[i + 1]);
243  }
244 
245  }
246 
247 }
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
Definition: Binning.h:27
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
Definition: Binning.h:58
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
Definition: Binning.h:56
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
Definition: Binning.h:61
static Binning CreateEquidistant(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equidistant binning.
Definition: Binning.cc:139
double m_bckgrd_yield
Background yield in data distribution.
Definition: Binning.h:54
double m_signal_yield
Signal yield in data distribution.
Definition: Binning.h:53
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
General options which are shared by all MVA trainings.
Definition: Options.h:62
Dataset for Reweighting Wraps a dataset and provides each data-point with a new weight.
Definition: DataDriven.h:29
Dataset for sPlot Wraps a dataset and provides each data-point twice, once as signal and once as back...
Definition: DataDriven.h:161
TEST(TestgetDetectorRegion, TestgetDetectorRegion)
Test Constructors.
Abstract base class for different kinds of events.