Belle II Software  release-08-01-10
DataDriven.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 #ifndef INCLUDE_GUARD_BELLE2_MVA_SPLOT_HEADER
11 #define INCLUDE_GUARD_BELLE2_MVA_SPLOT_HEADER
12 
13 #include <mva/interface/Dataset.h>
14 #include <mva/utility/Binning.h>
15 #include <mva/interface/Expert.h>
16 
17 
18 namespace Belle2 {
23  namespace MVA {
24 
29  class ReweightingDataset : public Dataset {
30 
31  public:
38  ReweightingDataset(const GeneralOptions& general_options, Dataset& dataset, const std::vector<float>& weights);
39 
43  virtual unsigned int getNumberOfFeatures() const override { return m_dataset.getNumberOfFeatures(); }
44 
48  virtual unsigned int getNumberOfSpectators() const override { return m_dataset.getNumberOfSpectators(); }
49 
53  virtual unsigned int getNumberOfEvents() const override { return m_dataset.getNumberOfEvents(); };
54 
59  virtual std::vector<float> getFeature(unsigned int iFeature) override { return m_dataset.getFeature(iFeature); }
60 
65  virtual std::vector<float> getSpectator(unsigned int iSpectator) override { return m_dataset.getSpectator(iSpectator); }
66 
70  virtual std::vector<float> getWeights() override
71  {
72  auto weights = m_dataset.getWeights();
73  for (unsigned int iEvent = 0; iEvent < weights.size(); ++iEvent) {
74  weights[iEvent] *= m_weights[iEvent];
75  }
76  return weights;
77  }
78 
82  virtual std::vector<float> getTargets() override { return m_dataset.getTargets(); }
83 
87  virtual std::vector<bool> getSignals() override { return m_dataset.getSignals(); }
88 
93  virtual void loadEvent(unsigned int event) override;
94 
95  private:
97  std::vector<float> m_weights;
98  };
99 
104  class SidebandDataset : public Dataset {
105 
106  public:
114  SidebandDataset(const GeneralOptions& general_options, Dataset& dataset, Dataset& mc_dataset, const std::string& sideband_variable);
115 
119  virtual unsigned int getNumberOfFeatures() const override { return m_dataset.getNumberOfFeatures(); }
120 
124  virtual unsigned int getNumberOfSpectators() const override { return m_dataset.getNumberOfSpectators(); }
125 
129  virtual unsigned int getNumberOfEvents() const override { return m_dataset.getNumberOfEvents(); };
130 
135  virtual std::vector<float> getFeature(unsigned int iFeature) override { return m_dataset.getFeature(iFeature); }
136 
141  virtual std::vector<float> getSpectator(unsigned int iSpectator) override { return m_dataset.getSpectator(iSpectator); }
142 
147  virtual void loadEvent(unsigned int event) override;
148 
149  private:
155  };
156 
161  class SPlotDataset : public Dataset {
162 
163  public:
171  SPlotDataset(const GeneralOptions& general_options, Dataset& dataset, const std::vector<float>& weights, float signalFraction);
172 
176  virtual unsigned int getNumberOfFeatures() const override { return m_dataset.getNumberOfFeatures(); }
177 
181  virtual unsigned int getNumberOfSpectators() const override { return m_dataset.getNumberOfSpectators(); }
182 
186  virtual unsigned int getNumberOfEvents() const override { return 2 * m_dataset.getNumberOfEvents(); };
187 
192  virtual void loadEvent(unsigned int event) override;
193 
197  virtual float getSignalFraction() override;
198 
199  private:
201  std::vector<float> m_weights;
203  };
204 
210  std::vector<float> getSPlotWeights(Dataset& dataset, const Binning& binning);
211 
217  std::vector<float> getBoostWeights(Dataset& dataset, const Binning& binning);
218 
225  std::vector<float> getAPlotWeights(Dataset& dataset, const Binning& binning, const std::vector<float>& boost_prediction);
226 
227  }
229 }
230 #endif
Binning of a data distribution Provides PDF and CDF values of the distribution per bin.
Definition: Binning.h:27
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
virtual unsigned int getNumberOfEvents() const =0
Returns the number of events in this dataset.
virtual unsigned int getNumberOfSpectators() const =0
Returns the number of spectators in this dataset.
virtual std::vector< bool > getSignals()
Returns all is Signals.
Definition: Dataset.cc:122
virtual unsigned int getNumberOfFeatures() const =0
Returns the number of features in this dataset.
virtual std::vector< float > getSpectator(unsigned int iSpectator)
Returns all values of one spectator in a std::vector<float>
Definition: Dataset.cc:86
virtual std::vector< float > getTargets()
Returns all targets.
Definition: Dataset.cc:110
virtual std::vector< float > getFeature(unsigned int iFeature)
Returns all values of one feature in a std::vector<float>
Definition: Dataset.cc:74
virtual std::vector< float > getWeights()
Returns all weights.
Definition: Dataset.cc:98
General options which are shared by all MVA trainings.
Definition: Options.h:62
Dataset for Reweighting Wraps a dataset and provides each data-point with a new weight.
Definition: DataDriven.h:29
ReweightingDataset(const GeneralOptions &general_options, Dataset &dataset, const std::vector< float > &weights)
Constructs a new ReweightingDataset.
Definition: DataDriven.cc:25
std::vector< float > m_weights
sPlot weights
Definition: DataDriven.h:97
Dataset & m_dataset
Wrapped dataset.
Definition: DataDriven.h:96
virtual std::vector< float > getTargets() override
Returns all targets.
Definition: DataDriven.h:82
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset.
Definition: DataDriven.h:53
virtual std::vector< float > getWeights() override
Returns all weights.
Definition: DataDriven.h:70
virtual void loadEvent(unsigned int event) override
Load the event number iEvent.
Definition: DataDriven.cc:28
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float>
Definition: DataDriven.h:59
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float>
Definition: DataDriven.h:65
virtual unsigned int getNumberOfSpectators() const override
Returns the number of features in this dataset.
Definition: DataDriven.h:48
virtual std::vector< bool > getSignals() override
Returns all is Signals.
Definition: DataDriven.h:87
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
Definition: DataDriven.h:43
Dataset for sPlot Wraps a dataset and provides each data-point twice, once as signal and once as back...
Definition: DataDriven.h:161
std::vector< float > m_weights
sPlot weights
Definition: DataDriven.h:201
Dataset & m_dataset
Wrapped dataset.
Definition: DataDriven.h:200
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset.
Definition: DataDriven.h:186
virtual void loadEvent(unsigned int event) override
Load the event number iEvent.
Definition: DataDriven.cc:162
SPlotDataset(const GeneralOptions &general_options, Dataset &dataset, const std::vector< float > &weights, float signalFraction)
Constructs a new SPlotDataset.
Definition: DataDriven.cc:153
virtual unsigned int getNumberOfSpectators() const override
Returns the number of features in this dataset.
Definition: DataDriven.h:181
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
Definition: DataDriven.h:176
virtual float getSignalFraction() override
Returns the signal fraction of the whole sample.
Definition: DataDriven.cc:157
float m_signalFraction
Signal fraction.
Definition: DataDriven.h:202
Dataset for Sideband Subtraction Wraps a dataset and provides each data-point with a new weight.
Definition: DataDriven.h:104
Dataset & m_dataset
Wrapped dataset.
Definition: DataDriven.h:150
double m_negative_signal_weight
the weight for negative signal events
Definition: DataDriven.h:154
double m_signal_weight
the weight for signal events
Definition: DataDriven.h:152
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset.
Definition: DataDriven.h:129
virtual void loadEvent(unsigned int event) override
Load the event number iEvent.
Definition: DataDriven.cc:127
double m_background_weight
the weight for background events
Definition: DataDriven.h:153
SidebandDataset(const GeneralOptions &general_options, Dataset &dataset, Dataset &mc_dataset, const std::string &sideband_variable)
Constructs a new SidebandDataset.
Definition: DataDriven.cc:38
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float>
Definition: DataDriven.h:135
int m_spectator_index
spectator containing the sideband variable
Definition: DataDriven.h:151
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float>
Definition: DataDriven.h:141
virtual unsigned int getNumberOfSpectators() const override
Returns the number of features in this dataset.
Definition: DataDriven.h:124
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
Definition: DataDriven.h:119
Abstract base class for different kinds of events.