Belle II Software  release-05-01-25
Binning.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2016 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Thomas Keck *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <mva/utility/Binning.h>
12 
13 #include <algorithm>
14 #include <numeric>
15 
16 namespace Belle2 {
21  namespace MVA {
22 
23  Binning::Binning(unsigned int nBins)
24  {
25 
26  m_signal_pdf.resize(nBins, 0.0);
27  m_signal_cdf.resize(nBins, 0.0);
28  m_bckgrd_pdf.resize(nBins, 0.0);
29  m_bckgrd_cdf.resize(nBins, 0.0);
30  m_boundaries.resize(nBins + 1, 0.0);
31 
32  m_signal_yield = 0;
33  m_bckgrd_yield = 0;
34  }
35 
36  unsigned int Binning::getBin(float datapoint) const
37  {
38 
39  auto it = std::upper_bound(m_boundaries.begin(), m_boundaries.end(), datapoint);
40  unsigned int bin = std::distance(m_boundaries.begin(), it);
41  if (bin == 0)
42  bin = 1;
43  if (bin == m_boundaries.size())
44  bin = m_boundaries.size() - 1;
45  return bin - 1;
46 
47  }
48 
50  {
51 
52  unsigned int nBins = m_signal_pdf.size();
53 
54  m_signal_yield = 0;
55  m_bckgrd_yield = 0;
56 
57  // Total number of events
58  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
61  }
62 
63  // Each bin is normed to its width
64  double last_valid_bound = m_boundaries[0];
65  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
66  m_signal_pdf[iBin] /= m_signal_yield * (m_boundaries[iBin + 1] - last_valid_bound) / (m_boundaries[nBins] - m_boundaries[0]);
67  m_bckgrd_pdf[iBin] /= m_bckgrd_yield * (m_boundaries[iBin + 1] - last_valid_bound) / (m_boundaries[nBins] - m_boundaries[0]);
68  if (iBin + 1 < nBins and m_boundaries[iBin + 2] > m_boundaries[iBin + 1]) {
69  last_valid_bound = m_boundaries[iBin + 1];
70  }
71  }
72 
73  }
74 
76  {
77 
78  unsigned int nBins = m_signal_pdf.size();
79 
82 
83  for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
84  m_signal_cdf[iBin] *= (m_boundaries[iBin + 1] - m_boundaries[iBin]) / (m_boundaries[nBins] - m_boundaries[0]);
85  m_bckgrd_cdf[iBin] *= (m_boundaries[iBin + 1] - m_boundaries[iBin]) / (m_boundaries[nBins] - m_boundaries[0]);
86  }
87 
88  for (unsigned int iBin = 1; iBin < nBins; ++iBin) {
89  m_signal_cdf[iBin] += m_signal_cdf[iBin - 1];
90  m_bckgrd_cdf[iBin] += m_bckgrd_cdf[iBin - 1];
91  }
92 
93  }
94 
95  Binning Binning::CreateEqualFrequency(const std::vector<float>& data, const std::vector<float>& weights,
96  const std::vector<bool>& isSignal, unsigned int nBins)
97  {
98 
99  Binning binning(nBins);
100 
101  unsigned int nEvents = data.size();
102 
103  std::vector<unsigned int> indices(nEvents);
104  std::iota(indices.begin(), indices.end(), 0);
105  std::sort(indices.begin(), indices.end(), [&](unsigned int i, unsigned int j) {return data[i] < data[j]; });
106 
107  double sum_weights = 0;
108  for (auto& w : weights)
109  sum_weights += w;
110  double weight_per_bin = sum_weights / nBins;
111 
112  unsigned int bin = 1;
113  double current_weight = 0;
114  binning.m_boundaries[0] = data[indices[0]];
115  binning.m_boundaries[nBins] = data[indices[nEvents - 1]];
116 
117  for (unsigned int iEvent = 0; iEvent < nEvents; ++iEvent) {
118  unsigned int index = indices[iEvent];
119  current_weight += weights[index];
120  if (current_weight >= weight_per_bin and bin < nBins and binning.m_boundaries[bin - 1] < data[index]) {
121  auto number_of_bins = static_cast<unsigned int>(current_weight / weight_per_bin);
122  current_weight -= weight_per_bin * number_of_bins;
123  for (unsigned int i = 0; i < number_of_bins; ++i) {
124  binning.m_boundaries[bin] = data[index];
125  bin++;
126  }
127  }
128  if (isSignal[index]) {
129  binning.m_signal_pdf[bin - 1] += weights[index];
130  } else {
131  binning.m_bckgrd_pdf[bin - 1] += weights[index];
132  }
133  }
134 
135  binning.normalizePDFs();
136  binning.calculateCDFsFromPDFs();
137 
138  return binning;
139  }
140 
141  Binning Binning::CreateEquidistant(const std::vector<float>& data, const std::vector<float>& weights,
142  const std::vector<bool>& isSignal, unsigned int nBins)
143  {
144 
145  Binning binning(nBins);
146 
147  auto minmax = std::minmax_element(data.begin(), data.end());
148  float min = *(minmax.first);
149  float max = *(minmax.second);
150  float step = (max - min) / nBins;
151 
152  for (unsigned int iBin = 0; iBin <= nBins; ++iBin) {
153  binning.m_boundaries[iBin] = min + step * iBin;
154  }
155 
156  for (unsigned int iEvent = 0; iEvent < data.size(); ++iEvent) {
157  unsigned int bin = binning.getBin(data[iEvent]);
158 
159  if (isSignal[iEvent])
160  binning.m_signal_pdf[bin] += weights[iEvent];
161  else
162  binning.m_bckgrd_pdf[bin] += weights[iEvent];
163 
164  }
165 
166  binning.normalizePDFs();
167  binning.calculateCDFsFromPDFs();
168 
169  return binning;
170 
171  }
172 
173  }
175 }
176 
Belle2::MVA::Binning::m_bckgrd_yield
double m_bckgrd_yield
Background yield in data distribution.
Definition: Binning.h:56
Belle2::MVA::Binning::m_boundaries
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
Definition: Binning.h:63
Belle2::MVA::Binning::getBin
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
Definition: Binning.cc:44
Belle2::MVA::Binning::calculateCDFsFromPDFs
void calculateCDFsFromPDFs()
Calculates the CDF values from the pdf values, which are assumed to be normalized.
Definition: Binning.cc:83
Belle2::MVA::Binning::CreateEqualFrequency
static Binning CreateEqualFrequency(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equal frequency (aka equal-statistics) binning.
Definition: Binning.cc:103
Belle2::MVA::Binning::m_bckgrd_pdf
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
Definition: Binning.h:60
Belle2::MVA::Binning::CreateEquidistant
static Binning CreateEquidistant(const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
Create an equidistant binning.
Definition: Binning.cc:149
Belle2::MVA::Binning::normalizePDFs
void normalizePDFs()
Normalizes the PDF values, so their sum is 1.
Definition: Binning.cc:57
Belle2::MVA::Binning::m_signal_cdf
std::vector< float > m_signal_cdf
Signal cdf of data distribution per bin.
Definition: Binning.h:59
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MVA::Binning::m_bckgrd_cdf
std::vector< float > m_bckgrd_cdf
Background cdf of data distribution per bin.
Definition: Binning.h:61
Belle2::MVA::Binning::Binning
Binning(unsigned int nBins=0)
Creates an empty binning with nBins.
Definition: Binning.cc:31
Belle2::MVA::Binning::m_signal_pdf
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
Definition: Binning.h:58
Belle2::MVA::Binning::m_signal_yield
double m_signal_yield
Signal yield in data distribution.
Definition: Binning.h:55