11 #include <mva/utility/DataDriven.h>
12 #include <mva/utility/Utility.h>
13 #include <mva/methods/PDF.h>
14 #include <mva/methods/Trivial.h>
15 #include <mva/methods/Combination.h>
17 #include <framework/logging/Logger.h>
28 const std::vector<float>& weights) : Dataset(general_options), m_dataset(dataset), m_weights(weights) { }
41 const std::string& sideband_variable) :
Dataset(general_options), m_dataset(dataset)
47 double total_signal_mc = 0.0;
48 double total_mc = 0.0;
49 double sum_signal_sr = 0.0;
51 double sum_signal_br = 0.0;
53 double sum_signal_nr = 0.0;
56 for (
unsigned int iEvent = 0; iEvent < mc_dataset.
getNumberOfEvents(); ++iEvent) {
59 total_signal_mc += mc_dataset.
m_weight;
61 if (mc_dataset.
m_spectators[mc_spectator_index] == 1.0) {
63 sum_signal_sr += mc_dataset.
m_weight;
65 }
else if (mc_dataset.
m_spectators[mc_spectator_index] == 2.0) {
67 sum_signal_br += mc_dataset.
m_weight;
69 }
else if (mc_dataset.
m_spectators[mc_spectator_index] == 3.0) {
71 sum_signal_nr += mc_dataset.
m_weight;
76 double total_data = 0.0;
77 double sum_data_sr = 0.0;
78 double sum_data_br = 0.0;
79 double sum_data_nr = 0.0;
86 }
else if (dataset.
m_spectators[m_spectator_index] == 2.0) {
88 }
else if (dataset.
m_spectators[m_spectator_index] == 3.0) {
93 if (sum_signal_br / sum_br > 0.01) {
94 B2WARNING(
"The background region you defined in the sideband substraction contains more than 1% signal");
96 if (sum_signal_nr / sum_nr > 0.01) {
97 B2WARNING(
"The negative signal region you defined in the sideband substraction contains more than 1% signal");
100 if (sum_data_sr - sum_signal_sr < 0) {
101 B2ERROR(
"There is less data in the signal region than the expected amount of signal events in the signal region estimated from MC.");
104 if (total_data - total_signal_mc < 0) {
105 B2ERROR(
"There is less data than the expected amount of signal events estimated from MC.");
112 m_signal_weight = 1.0;
115 m_background_weight = (total_data - total_signal_mc) / sum_data_br;
119 m_negative_signal_weight = - (sum_data_sr - sum_signal_sr) / sum_data_nr;
121 B2INFO(
"Data " << total_data <<
" " << sum_data_sr <<
" " << sum_data_br <<
" " << sum_data_nr);
122 B2INFO(
"MC " << total_mc <<
" " << sum_sr <<
" " << sum_br <<
" " << sum_nr);
123 B2INFO(
"MC (signal)" << total_signal_mc <<
" " << sum_signal_sr <<
" " << sum_signal_br <<
" " << sum_signal_nr);
124 B2INFO(
"Sideband Substraction: Signal Weight " << m_signal_weight <<
" Background Weight " << m_background_weight <<
125 " Negative Signal Weight " << m_negative_signal_weight);
156 float signalFraction) : Dataset(general_options), m_dataset(dataset), m_weights(weights), m_signalFraction(signalFraction) { }
171 if (event % 2 == 1) {
183 std::vector<float> getSPlotWeights(Dataset& dataset, Binning& binning)
192 auto discriminants = dataset.getFeature(0);
197 double inverse_covariance[3] = {0};
198 for (
auto& v : discriminants) {
199 const unsigned int iBin = binning.getBin(v);
200 double norm = (binning.m_signal_yield * binning.m_signal_pdf[iBin] + binning.m_bckgrd_yield * binning.m_bckgrd_pdf[iBin]);
202 inverse_covariance[0] += binning.m_signal_pdf[iBin] * binning.m_signal_pdf[iBin] / norm;
203 inverse_covariance[1] += binning.m_signal_pdf[iBin] * binning.m_bckgrd_pdf[iBin] / norm;
204 inverse_covariance[2] += binning.m_bckgrd_pdf[iBin] * binning.m_bckgrd_pdf[iBin] / norm;
206 double covariance[3] = {0};
207 double determinante = (inverse_covariance[0] * inverse_covariance[2] - inverse_covariance[1] * inverse_covariance[1]);
208 covariance[0] = inverse_covariance[2] / determinante;
209 covariance[1] = -inverse_covariance[1] / determinante;
210 covariance[2] = inverse_covariance[0] / determinante;
213 std::vector<float> splot_weights;
214 splot_weights.reserve(2 * discriminants.size());
215 for (
auto& v : discriminants) {
216 const unsigned int iBin = binning.getBin(v);
217 double norm = (binning.m_signal_yield * binning.m_signal_pdf[iBin] + binning.m_bckgrd_yield * binning.m_bckgrd_pdf[iBin]);
218 splot_weights.push_back((covariance[0] * binning.m_signal_pdf[iBin] + covariance[1] * binning.m_bckgrd_pdf[iBin]) / norm);
219 splot_weights.push_back((covariance[1] * binning.m_signal_pdf[iBin] + covariance[2] * binning.m_bckgrd_pdf[iBin]) / norm);
222 B2INFO(
"Covariance Matrix of SPlot");
223 B2INFO(covariance[0] <<
" " << covariance[1] <<
" " << covariance[2]);
225 return splot_weights;
229 std::vector<float> getBoostWeights(Dataset& dataset, Binning& binning)
232 std::vector<float> boost_weights;
233 boost_weights.reserve(2 * dataset.getNumberOfEvents());
234 for (
unsigned int iEvent = 0; iEvent < dataset.getNumberOfEvents(); ++iEvent) {
235 dataset.loadEvent(iEvent);
236 const unsigned int bin = binning.getBin(dataset.m_input[0]);
237 boost_weights.push_back(binning.m_signal_cdf[bin] / binning.m_bckgrd_pdf[bin]);
238 boost_weights.push_back((1 - binning.m_signal_cdf[bin]) / binning.m_bckgrd_pdf[bin]);
240 return boost_weights;
244 std::vector<float> getAPlotWeights(Dataset& dataset, Binning& binning,
const std::vector<float>& boost_predictions)
247 std::vector<float> splot_weights = getSPlotWeights(dataset, binning);
248 std::vector<float> aplot_weights;
249 aplot_weights.reserve(2 * dataset.getNumberOfEvents());
251 for (
unsigned int iEvent = 0; iEvent < dataset.getNumberOfEvents(); ++iEvent) {
252 dataset.loadEvent(iEvent);
253 const unsigned int iBin = binning.getBin(dataset.m_input[0]);
255 double reg_boost_prediction = boost_predictions[iEvent];
257 if (reg_boost_prediction > 0.995)
258 reg_boost_prediction = 0.995;
260 if (reg_boost_prediction < 0.005)
261 reg_boost_prediction = 0.005;
263 float aplot_weight = (binning.m_signal_cdf[iBin] / reg_boost_prediction + (1 - binning.m_signal_cdf[iBin]) /
264 (1 - reg_boost_prediction)) / 2.0;
266 aplot_weights.push_back(splot_weights[2 * iEvent] * aplot_weight);
267 aplot_weights.push_back(splot_weights[2 * iEvent + 1] * aplot_weight);
271 return aplot_weights;