9 #include <mva/utility/DataDriven.h>
10 #include <mva/utility/Utility.h>
11 #include <mva/methods/PDF.h>
12 #include <mva/methods/Trivial.h>
13 #include <mva/methods/Combination.h>
15 #include <framework/logging/Logger.h>
26 const std::vector<float>& weights) :
Dataset(general_options), m_dataset(dataset), m_weights(weights) { }
39 const std::string& sideband_variable) :
Dataset(general_options), m_dataset(dataset)
45 double total_signal_mc = 0.0;
46 double total_mc = 0.0;
47 double sum_signal_sr = 0.0;
49 double sum_signal_br = 0.0;
51 double sum_signal_nr = 0.0;
54 for (
unsigned int iEvent = 0; iEvent < mc_dataset.
getNumberOfEvents(); ++iEvent) {
57 total_signal_mc += mc_dataset.
m_weight;
59 if (mc_dataset.
m_spectators[mc_spectator_index] == 1.0) {
61 sum_signal_sr += mc_dataset.
m_weight;
63 }
else if (mc_dataset.
m_spectators[mc_spectator_index] == 2.0) {
65 sum_signal_br += mc_dataset.
m_weight;
67 }
else if (mc_dataset.
m_spectators[mc_spectator_index] == 3.0) {
69 sum_signal_nr += mc_dataset.
m_weight;
74 double total_data = 0.0;
75 double sum_data_sr = 0.0;
76 double sum_data_br = 0.0;
77 double sum_data_nr = 0.0;
91 if (sum_signal_br / sum_br > 0.01) {
92 B2WARNING(
"The background region you defined in the sideband subtraction contains more than 1% signal");
94 if (sum_signal_nr / sum_nr > 0.01) {
95 B2WARNING(
"The negative signal region you defined in the sideband subtraction contains more than 1% signal");
98 if (sum_data_sr - sum_signal_sr < 0) {
99 B2ERROR(
"There is less data in the signal region than the expected amount of signal events in the signal region estimated from MC.");
102 if (total_data - total_signal_mc < 0) {
103 B2ERROR(
"There is less data than the expected amount of signal events estimated from MC.");
119 B2INFO(
"Data " << total_data <<
" " << sum_data_sr <<
" " << sum_data_br <<
" " << sum_data_nr);
120 B2INFO(
"MC " << total_mc <<
" " << sum_sr <<
" " << sum_br <<
" " << sum_nr);
121 B2INFO(
"MC (signal)" << total_signal_mc <<
" " << sum_signal_sr <<
" " << sum_signal_br <<
" " << sum_signal_nr);
154 float signalFraction) :
Dataset(general_options), m_dataset(dataset), m_weights(weights), m_signalFraction(signalFraction) { }
169 if (event % 2 == 1) {
181 std::vector<float> getSPlotWeights(
Dataset& dataset,
const Binning& binning)
195 double inverse_covariance[3] = {0};
196 for (
auto& v : discriminants) {
197 const unsigned int iBin = binning.
getBin(v);
204 double covariance[3] = {0};
205 double determinante = (inverse_covariance[0] * inverse_covariance[2] - inverse_covariance[1] * inverse_covariance[1]);
206 covariance[0] = inverse_covariance[2] / determinante;
207 covariance[1] = -inverse_covariance[1] / determinante;
208 covariance[2] = inverse_covariance[0] / determinante;
211 std::vector<float> splot_weights;
212 splot_weights.reserve(2 * discriminants.size());
213 for (
auto& v : discriminants) {
214 const unsigned int iBin = binning.
getBin(v);
216 splot_weights.push_back((covariance[0] * binning.
m_signal_pdf[iBin] + covariance[1] * binning.
m_bckgrd_pdf[iBin]) / norm);
217 splot_weights.push_back((covariance[1] * binning.
m_signal_pdf[iBin] + covariance[2] * binning.
m_bckgrd_pdf[iBin]) / norm);
220 B2INFO(
"Covariance Matrix of SPlot");
221 B2INFO(covariance[0] <<
" " << covariance[1] <<
" " << covariance[2]);
223 return splot_weights;
227 std::vector<float> getBoostWeights(Dataset& dataset,
const Binning& binning)
230 std::vector<float> boost_weights;
231 boost_weights.reserve(2 * dataset.getNumberOfEvents());
232 for (
unsigned int iEvent = 0; iEvent < dataset.getNumberOfEvents(); ++iEvent) {
233 dataset.loadEvent(iEvent);
234 const unsigned int bin = binning.getBin(dataset.m_input[0]);
235 boost_weights.push_back(binning.m_signal_cdf[bin] / binning.m_bckgrd_pdf[bin]);
236 boost_weights.push_back((1 - binning.m_signal_cdf[bin]) / binning.m_bckgrd_pdf[bin]);
238 return boost_weights;
242 std::vector<float> getAPlotWeights(Dataset& dataset,
const Binning& binning,
const std::vector<float>& boost_predictions)
245 std::vector<float> splot_weights = getSPlotWeights(dataset, binning);
246 std::vector<float> aplot_weights;
247 aplot_weights.reserve(2 * dataset.getNumberOfEvents());
249 for (
unsigned int iEvent = 0; iEvent < dataset.getNumberOfEvents(); ++iEvent) {
250 dataset.loadEvent(iEvent);
251 const unsigned int iBin = binning.getBin(dataset.m_input[0]);
253 double reg_boost_prediction = boost_predictions[iEvent];
255 if (reg_boost_prediction > 0.995)
256 reg_boost_prediction = 0.995;
258 if (reg_boost_prediction < 0.005)
259 reg_boost_prediction = 0.005;
261 float aplot_weight = (binning.m_signal_cdf[iBin] / reg_boost_prediction + (1 - binning.m_signal_cdf[iBin]) /
262 (1 - reg_boost_prediction)) / 2.0;
264 aplot_weights.push_back(splot_weights[2 * iEvent] * aplot_weight);
265 aplot_weights.push_back(splot_weights[2 * iEvent + 1] * aplot_weight);
269 return aplot_weights;
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
std::vector< float > m_bckgrd_pdf
Background pdf of data distribution per bin.
std::vector< float > m_signal_pdf
Signal pdf of data distribution per bin.
double m_bckgrd_yield
Background yield in data distribution.
double m_signal_yield
Signal yield in data distribution.
unsigned int getBin(float datapoint) const
Gets the bin corresponding to the given datapoint.
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
virtual unsigned int getNumberOfEvents() const =0
Returns the number of events in this dataset.
std::vector< float > m_spectators
Contains all spectators values of the currently loaded event.
virtual void loadEvent(unsigned int iEvent)=0
Load the event number iEvent.
std::vector< float > m_input
Contains all feature values of the currently loaded event.
virtual std::vector< float > getFeature(unsigned int iFeature)
Returns all values of one feature in a std::vector<float>
bool m_isSignal
Defines if the currently loaded event is signal or background.
float m_weight
Contains the weight of the currently loaded event.
virtual unsigned int getSpectatorIndex(const std::string &spectator)
Return index of spectator with the given name.
float m_target
Contains the target value of the currently loaded event.
General options which are shared by all MVA trainings.
ReweightingDataset(const GeneralOptions &general_options, Dataset &dataset, const std::vector< float > &weights)
Constructs a new ReweightingDataset.
std::vector< float > m_weights
sPlot weights
Dataset & m_dataset
Wrapped dataset.
virtual void loadEvent(unsigned int event) override
Load the event number iEvent.
std::vector< float > m_weights
sPlot weights
Dataset & m_dataset
Wrapped dataset.
virtual void loadEvent(unsigned int event) override
Load the event number iEvent.
SPlotDataset(const GeneralOptions &general_options, Dataset &dataset, const std::vector< float > &weights, float signalFraction)
Constructs a new SPlotDataset.
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
virtual float getSignalFraction() override
Returns the signal fraction of the whole sample.
float m_signalFraction
Signal fraction.
Dataset & m_dataset
Wrapped dataset.
double m_negative_signal_weight
the weight for negative signal events
double m_signal_weight
the weight for signal events
virtual void loadEvent(unsigned int event) override
Load the event number iEvent.
double m_background_weight
the weight for background events
SidebandDataset(const GeneralOptions &general_options, Dataset &dataset, Dataset &mc_dataset, const std::string &sideband_variable)
Constructs a new SidebandDataset.
int m_spectator_index
spectator containing the sideband variable
Abstract base class for different kinds of events.