Belle II Software development
Binning Class Reference

Binning of a data distribution Provides PDF and CDF values of the distribution per bin. More...

#include <Binning.h>

Public Member Functions

 Binning (unsigned int nBins=0)
 Creates an empty binning with nBins.
 
unsigned int getBin (float datapoint) const
 Gets the bin corresponding to the given datapoint.
 
void normalizePDFs ()
 Normalizes the PDF values, so their sum is 1.
 
void calculateCDFsFromPDFs ()
 Calculates the CDF values from the pdf values, which are assumed to be normalized.
 

Static Public Member Functions

static Binning CreateEqualFrequency (const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
 Create an equal frequency (aka equal-statistics) binning.
 
static Binning CreateEquidistant (const std::vector< float > &data, const std::vector< float > &weights, const std::vector< bool > &isSignal, unsigned int nBins)
 Create an equidistant binning.
 

Public Attributes

double m_signal_yield
 Signal yield in data distribution.
 
double m_bckgrd_yield
 Background yield in data distribution.
 
std::vector< float > m_signal_pdf
 Signal pdf of data distribution per bin.
 
std::vector< float > m_signal_cdf
 Signal cdf of data distribution per bin.
 
std::vector< float > m_bckgrd_pdf
 Background pdf of data distribution per bin.
 
std::vector< float > m_bckgrd_cdf
 Background cdf of data distribution per bin.
 
std::vector< float > m_boundaries
 Boundaries of data distribution, including minimum and maximum value as first and last boundary.
 

Detailed Description

Binning of a data distribution Provides PDF and CDF values of the distribution per bin.

Definition at line 27 of file Binning.h.

Constructor & Destructor Documentation

◆ Binning()

Binning ( unsigned int  nBins = 0)
explicit

Creates an empty binning with nBins.

Parameters
nBinsnumber of bins

Definition at line 21 of file Binning.cc.

22 {
23
24 m_signal_pdf.resize(nBins, 0.0);
25 m_signal_cdf.resize(nBins, 0.0);
26 m_bckgrd_pdf.resize(nBins, 0.0);
27 m_bckgrd_cdf.resize(nBins, 0.0);
28 m_boundaries.resize(nBins + 1, 0.0);
29
32 }
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
Definition: Binning.h:58
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
Definition: Binning.h:56
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
Definition: Binning.h:61
std::vector< float > m_bckgrd_cdf
Background cdf of data distribution per bin.
Definition: Binning.h:59
double m_bckgrd_yield
Background yield in data distribution.
Definition: Binning.h:54
double m_signal_yield
Signal yield in data distribution.
Definition: Binning.h:53
std::vector< float > m_signal_cdf
Signal cdf of data distribution per bin.
Definition: Binning.h:57

Member Function Documentation

◆ calculateCDFsFromPDFs()

void calculateCDFsFromPDFs ( )

Calculates the CDF values from the pdf values, which are assumed to be normalized.

Definition at line 73 of file Binning.cc.

74 {
75
76 unsigned int nBins = m_signal_pdf.size();
77
80
81 for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
82 m_signal_cdf[iBin] *= (m_boundaries[iBin + 1] - m_boundaries[iBin]) / (m_boundaries[nBins] - m_boundaries[0]);
83 m_bckgrd_cdf[iBin] *= (m_boundaries[iBin + 1] - m_boundaries[iBin]) / (m_boundaries[nBins] - m_boundaries[0]);
84 }
85
86 for (unsigned int iBin = 1; iBin < nBins; ++iBin) {
87 m_signal_cdf[iBin] += m_signal_cdf[iBin - 1];
88 m_bckgrd_cdf[iBin] += m_bckgrd_cdf[iBin - 1];
89 }
90
91 }

◆ CreateEqualFrequency()

Binning CreateEqualFrequency ( const std::vector< float > &  data,
const std::vector< float > &  weights,
const std::vector< bool > &  isSignal,
unsigned int  nBins 
)
static

Create an equal frequency (aka equal-statistics) binning.

Parameters
datadata-points sampled from the distribution
weightsweights for each data-point
isSignalper data point
nBinsnumber of bins

Definition at line 93 of file Binning.cc.

95 {
96
97 Binning binning(nBins);
98
99 unsigned int nEvents = data.size();
100
101 std::vector<unsigned int> indices(nEvents);
102 std::iota(indices.begin(), indices.end(), 0);
103 std::sort(indices.begin(), indices.end(), [&](unsigned int i, unsigned int j) {return data[i] < data[j]; });
104
105 double sum_weights = 0;
106 for (auto& w : weights)
107 sum_weights += w;
108 double weight_per_bin = sum_weights / nBins;
109
110 unsigned int bin = 1;
111 double current_weight = 0;
112 binning.m_boundaries[0] = data[indices[0]];
113 binning.m_boundaries[nBins] = data[indices[nEvents - 1]];
114
115 for (unsigned int iEvent = 0; iEvent < nEvents; ++iEvent) {
116 unsigned int index = indices[iEvent];
117 current_weight += weights[index];
118 if (current_weight >= weight_per_bin and bin < nBins and binning.m_boundaries[bin - 1] < data[index]) {
119 auto number_of_bins = static_cast<unsigned int>(current_weight / weight_per_bin);
120 current_weight -= weight_per_bin * number_of_bins;
121 for (unsigned int i = 0; i < number_of_bins; ++i) {
122 binning.m_boundaries[bin] = data[index];
123 bin++;
124 }
125 }
126 if (isSignal[index]) {
127 binning.m_signal_pdf[bin - 1] += weights[index];
128 } else {
129 binning.m_bckgrd_pdf[bin - 1] += weights[index];
130 }
131 }
132
133 binning.normalizePDFs();
134 binning.calculateCDFsFromPDFs();
135
136 return binning;
137 }
Binning(unsigned int nBins=0)
Creates an empty binning with nBins.
Definition: Binning.cc:21

◆ CreateEquidistant()

Binning CreateEquidistant ( const std::vector< float > &  data,
const std::vector< float > &  weights,
const std::vector< bool > &  isSignal,
unsigned int  nBins 
)
static

Create an equidistant binning.

Parameters
datadata-points sampled from the distribution
weightsweights for each data-point
isSignalper data point
nBinsnumber of bins

Definition at line 139 of file Binning.cc.

141 {
142
143 Binning binning(nBins);
144
145 auto minmax = std::minmax_element(data.begin(), data.end());
146 float min = *(minmax.first);
147 float max = *(minmax.second);
148 float step = (max - min) / nBins;
149
150 for (unsigned int iBin = 0; iBin <= nBins; ++iBin) {
151 binning.m_boundaries[iBin] = min + step * iBin;
152 }
153
154 for (unsigned int iEvent = 0; iEvent < data.size(); ++iEvent) {
155 unsigned int bin = binning.getBin(data[iEvent]);
156
157 if (isSignal[iEvent])
158 binning.m_signal_pdf[bin] += weights[iEvent];
159 else
160 binning.m_bckgrd_pdf[bin] += weights[iEvent];
161
162 }
163
164 binning.normalizePDFs();
165 binning.calculateCDFsFromPDFs();
166
167 return binning;
168
169 }

◆ getBin()

unsigned int getBin ( float  datapoint) const

Gets the bin corresponding to the given datapoint.

There are no overflow/underflow bins, so data points outside the original range are mapped to the first and last bin.

Parameters
datapointfor which the bin is returned

Definition at line 34 of file Binning.cc.

35 {
36
37 auto it = std::upper_bound(m_boundaries.begin(), m_boundaries.end(), datapoint);
38 unsigned int bin = std::distance(m_boundaries.begin(), it);
39 if (bin == 0)
40 bin = 1;
41 if (bin == m_boundaries.size())
42 bin = m_boundaries.size() - 1;
43 return bin - 1;
44
45 }

◆ normalizePDFs()

void normalizePDFs ( )

Normalizes the PDF values, so their sum is 1.

Definition at line 47 of file Binning.cc.

48 {
49
50 unsigned int nBins = m_signal_pdf.size();
51
54
55 // Total number of events
56 for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
59 }
60
61 // Each bin is normed to its width
62 double last_valid_bound = m_boundaries[0];
63 for (unsigned int iBin = 0; iBin < nBins; ++iBin) {
64 m_signal_pdf[iBin] /= m_signal_yield * (m_boundaries[iBin + 1] - last_valid_bound) / (m_boundaries[nBins] - m_boundaries[0]);
65 m_bckgrd_pdf[iBin] /= m_bckgrd_yield * (m_boundaries[iBin + 1] - last_valid_bound) / (m_boundaries[nBins] - m_boundaries[0]);
66 if (iBin + 1 < nBins and m_boundaries[iBin + 2] > m_boundaries[iBin + 1]) {
67 last_valid_bound = m_boundaries[iBin + 1];
68 }
69 }
70
71 }

Member Data Documentation

◆ m_bckgrd_cdf

std::vector<float> m_bckgrd_cdf

Background cdf of data distribution per bin.

Definition at line 59 of file Binning.h.

◆ m_bckgrd_pdf

std::vector<float> m_bckgrd_pdf

Background pdf of data distribution per bin.

Definition at line 58 of file Binning.h.

◆ m_bckgrd_yield

double m_bckgrd_yield

Background yield in data distribution.

Definition at line 54 of file Binning.h.

◆ m_boundaries

std::vector<float> m_boundaries

Boundaries of data distribution, including minimum and maximum value as first and last boundary.

Definition at line 61 of file Binning.h.

◆ m_signal_cdf

std::vector<float> m_signal_cdf

Signal cdf of data distribution per bin.

Definition at line 57 of file Binning.h.

◆ m_signal_pdf

std::vector<float> m_signal_pdf

Signal pdf of data distribution per bin.

Definition at line 56 of file Binning.h.

◆ m_signal_yield

double m_signal_yield

Signal yield in data distribution.

Definition at line 53 of file Binning.h.


The documentation for this class was generated from the following files: