9#include <mva/utility/Binning.h>
21 Binning::Binning(
unsigned int nBins)
34 unsigned int Binning::getBin(
float datapoint)
const
38 unsigned int bin = std::distance(
m_boundaries.begin(), it);
47 void Binning::normalizePDFs()
56 for (
unsigned int iBin = 0; iBin < nBins; ++iBin) {
63 for (
unsigned int iBin = 0; iBin < nBins; ++iBin) {
73 void Binning::calculateCDFsFromPDFs()
81 for (
unsigned int iBin = 0; iBin < nBins; ++iBin) {
86 for (
unsigned int iBin = 1; iBin < nBins; ++iBin) {
93 Binning Binning::CreateEqualFrequency(
const std::vector<float>& data,
const std::vector<float>& weights,
94 const std::vector<bool>& isSignal,
unsigned int nBins)
99 unsigned int nEvents = data.size();
101 std::vector<unsigned int> indices(nEvents);
102 std::iota(indices.begin(), indices.end(), 0);
103 std::sort(indices.begin(), indices.end(), [&](
unsigned int i,
unsigned int j) {return data[i] < data[j]; });
105 double sum_weights = 0;
106 for (
auto& w : weights)
108 double weight_per_bin = sum_weights / nBins;
110 unsigned int bin = 1;
111 double current_weight = 0;
113 binning.
m_boundaries[nBins] = data[indices[nEvents - 1]];
115 for (
unsigned int iEvent = 0; iEvent < nEvents; ++iEvent) {
116 unsigned int index = indices[iEvent];
117 current_weight += weights[index];
118 if (current_weight >= weight_per_bin and bin < nBins and binning.
m_boundaries[bin - 1] < data[index]) {
119 auto number_of_bins =
static_cast<unsigned int>(current_weight / weight_per_bin);
120 current_weight -= weight_per_bin * number_of_bins;
121 for (
unsigned int i = 0; i < number_of_bins; ++i) {
126 if (isSignal[index]) {
139 Binning Binning::CreateEquidistant(
const std::vector<float>& data,
const std::vector<float>& weights,
140 const std::vector<bool>& isSignal,
unsigned int nBins)
145 auto minmax = std::minmax_element(data.begin(), data.end());
146 float min = *(minmax.first);
147 float max = *(minmax.second);
148 float step = (max - min) / nBins;
150 for (
unsigned int iBin = 0; iBin <= nBins; ++iBin) {
154 for (
unsigned int iEvent = 0; iEvent < data.size(); ++iEvent) {
155 unsigned int bin = binning.
getBin(data[iEvent]);
157 if (isSignal[iEvent])
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
std::vector< float > m_boundaries
Boundaries of data distribution, including minimum and maximum value as first and last boundary.
std::vector< float > m_bckgrd_cdf
Background cdf of data distribution per bin.
double m_bckgrd_yield
Background yield in data distribution.
double m_signal_yield
Signal yield in data distribution.
std::vector< float > m_signal_cdf
Signal cdf of data distribution per bin.
void calculateCDFsFromPDFs()
Calculates the CDF values from the pdf values, which are assumed to be normalized.
void normalizePDFs()
Normalizes the PDF values, so their sum is 1.
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
Abstract base class for different kinds of events.