Belle II Software  release-05-02-19
Dataset.h
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2016 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Thomas Keck *
7  * Jochen Gemmler *
8  * *
9  * This software is provided "as is" without any warranty. *
10  **************************************************************************/
11 
12 #pragma once
13 #ifndef INCLUDE_GUARD_BELLE2_MVA_DATASET_HEADER
14 #define INCLUDE_GUARD_BELLE2_MVA_DATASET_HEADER
15 
16 #include <mva/interface/Options.h>
17 
18 #include <TFile.h>
19 #include <TChain.h>
20 
21 #include <string>
22 
23 namespace Belle2 {
28  namespace MVA {
29 
34  class Dataset {
35 
36  public:
41  explicit Dataset(const GeneralOptions& general_options);
42 
46  virtual ~Dataset() = default;
47 
51  Dataset(const Dataset&) = delete;
52 
56  Dataset& operator=(const Dataset&) = delete;
57 
61  virtual unsigned int getNumberOfFeatures() const = 0;
62 
66  virtual unsigned int getNumberOfSpectators() const = 0;
67 
71  virtual unsigned int getNumberOfEvents() const = 0;
72 
77  virtual void loadEvent(unsigned int iEvent) = 0;
78 
82  virtual float getSignalFraction();
83 
88  virtual unsigned int getFeatureIndex(const std::string& feature);
89 
94  virtual unsigned int getSpectatorIndex(const std::string& spectator);
95 
100  virtual std::vector<float> getFeature(unsigned int iFeature);
101 
106  virtual std::vector<float> getSpectator(unsigned int iSpectator);
107 
111  virtual std::vector<float> getWeights();
112 
116  virtual std::vector<float> getTargets();
117 
121  virtual std::vector<bool> getSignals();
122 
124  std::vector<float> m_input;
125  std::vector<float> m_spectators;
126  float m_weight;
127  float m_target;
128  bool m_isSignal;
129  };
130 
131 
136  class SingleDataset : public Dataset {
137 
138  public:
146  SingleDataset(const GeneralOptions& general_options, const std::vector<float>& input, float target = 1.0,
147  const std::vector<float>& spectators = std::vector<float>());
148 
152  virtual unsigned int getNumberOfFeatures() const override { return m_input.size(); }
153 
157  virtual unsigned int getNumberOfSpectators() const override { return m_spectators.size(); }
158 
162  virtual unsigned int getNumberOfEvents() const override { return 1; }
163 
167  virtual void loadEvent(unsigned int) override { };
168 
173  virtual std::vector<float> getFeature(unsigned int iFeature) override { return std::vector<float> {m_input[iFeature]}; }
174 
179  virtual std::vector<float> getSpectator(unsigned int iSpectator) override { return std::vector<float> {m_spectators[iSpectator]}; }
180 
181  };
182 
187  class MultiDataset : public Dataset {
188 
189  public:
197  MultiDataset(const GeneralOptions& general_options, const std::vector<std::vector<float>>& input,
198  const std::vector<std::vector<float>>& spectators,
199  const std::vector<float>& targets = {}, const std::vector<float>& weights = {});
200 
204  virtual unsigned int getNumberOfFeatures() const override { return m_input.size(); }
205 
209  virtual unsigned int getNumberOfSpectators() const override { return m_spectators.size(); }
210 
214  virtual unsigned int getNumberOfEvents() const override { return m_matrix.size(); }
215 
219  virtual void loadEvent(unsigned int iEvent) override;
220 
221 
222  private:
223  std::vector<std::vector<float>> m_matrix;
224  std::vector<std::vector<float>> m_spectator_matrix;
225  std::vector<float> m_targets;
226  std::vector<float> m_weights;
228  };
229 
234  class SubDataset : public Dataset {
235 
236  public:
243  SubDataset(const GeneralOptions& general_options, const std::vector<bool>& events, Dataset& dataset);
244 
248  virtual unsigned int getNumberOfFeatures() const override { return m_feature_indices.size(); }
249 
253  virtual unsigned int getNumberOfSpectators() const override { return m_spectator_indices.size(); }
254 
258  virtual unsigned int getNumberOfEvents() const override { return m_use_event_indices ? m_event_indices.size() : m_dataset.getNumberOfEvents(); }
259 
264  virtual void loadEvent(unsigned int iEvent) override;
265 
270  virtual std::vector<float> getFeature(unsigned int iFeature) override;
271 
276  virtual std::vector<float> getSpectator(unsigned int iSpectator) override;
277 
278  private:
279  bool m_use_event_indices = false;
280  std::vector<unsigned int>
282  std::vector<unsigned int>
284  std::vector<unsigned int>
288  };
289 
294  class CombinedDataset : public Dataset {
295 
296  public:
303  CombinedDataset(const GeneralOptions& general_options, Dataset& signal_dataset, Dataset& background_dataset);
304 
308  virtual unsigned int getNumberOfFeatures() const override { return m_signal_dataset.getNumberOfFeatures(); }
309 
313  virtual unsigned int getNumberOfSpectators() const override { return m_signal_dataset.getNumberOfSpectators(); }
314 
318  virtual unsigned int getNumberOfEvents() const override { return m_signal_dataset.getNumberOfEvents() + m_background_dataset.getNumberOfEvents(); }
319 
324  virtual void loadEvent(unsigned int iEvent) override;
325 
330  virtual std::vector<float> getFeature(unsigned int iFeature) override;
331 
336  virtual std::vector<float> getSpectator(unsigned int iSpectator) override;
337 
338  private:
342  };
343 
344 
349  class ROOTDataset : public Dataset {
350 
351  public:
356  explicit ROOTDataset(const GeneralOptions& _general_options);
357 
361  virtual unsigned int getNumberOfFeatures() const override { return m_input.size(); }
362 
366  virtual unsigned int getNumberOfSpectators() const override { return m_spectators.size(); }
367 
371  virtual unsigned int getNumberOfEvents() const override
372  {
373  return (m_general_options.m_max_events == 0) ? m_tree->GetEntries() : m_general_options.m_max_events;
374  }
375 
380  virtual void loadEvent(unsigned int event) override;
381 
386  virtual std::vector<float> getFeature(unsigned int iFeature) override;
387 
391  virtual std::vector<float> getWeights() override;
392 
397  virtual std::vector<float> getSpectator(unsigned int iSpectator) override;
398 
408  template<class T>
409  std::vector<float> getVectorFromTTree(std::string& variableType, std::string& branchName, T& memberVariableTarget);
410 
414  void setRootInputType();
415 
423  template<class T>
424  void setScalarVariableAddress(std::string& variableType, std::string& variableName, T& variableTarget);
425 
433  template<class T>
434  void setVectorVariableAddress(std::string& variableType, std::vector<std::string>& variableName,
435  T& variableTargets);
436 
440  virtual ~ROOTDataset();
441 
442 
443  private:
447  void setBranchAddresses();
448 
454  bool checkForBranch(TTree*, const std::string&) const;
455 
456  protected:
457  TChain* m_tree = nullptr;
458  bool m_isDoubleInputType = true;
459  std::vector<double> m_input_double;
460  std::vector<double> m_spectators_double;
463  };
464 
465  }
467 }
468 #endif
Belle2::MVA::CombinedDataset::loadEvent
virtual void loadEvent(unsigned int iEvent) override
Load the event number iEvent from the wrapped dataset.
Definition: Dataset.cc:289
Belle2::MVA::ROOTDataset::m_tree
TChain * m_tree
Pointer to the TChain containing the data.
Definition: Dataset.h:457
Belle2::MVA::ROOTDataset::getFeature
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float>
Definition: Dataset.cc:430
Belle2::MVA::MultiDataset
Wraps the data of a multiple event into a Dataset.
Definition: Dataset.h:187
Belle2::MVA::SingleDataset::SingleDataset
SingleDataset(const GeneralOptions &general_options, const std::vector< float > &input, float target=1.0, const std::vector< float > &spectators=std::vector< float >())
Constructs a new SingleDataset.
Definition: Dataset.cc:147
Belle2::MVA::Dataset::m_general_options
GeneralOptions m_general_options
GeneralOptions passed to this dataset.
Definition: Dataset.h:123
Belle2::MVA::SubDataset::m_use_event_indices
bool m_use_event_indices
Use only a subset of the wrapped dataset events.
Definition: Dataset.h:279
Belle2::MVA::SingleDataset::getSpectator
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values (in this case only one) of one spectator in a std::vector<float>
Definition: Dataset.h:179
Belle2::MVA::SingleDataset::getNumberOfEvents
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset which is always one.
Definition: Dataset.h:162
Belle2::MVA::Dataset
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:34
Belle2::MVA::Dataset::~Dataset
virtual ~Dataset()=default
Virtual default destructor.
Belle2::MVA::GeneralOptions::m_max_events
unsigned int m_max_events
Maximum number of events to process, 0 means all.
Definition: Options.h:93
Belle2::MVA::SubDataset::getFeature
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float> of the wrapped dataset.
Definition: Dataset.cc:257
Belle2::MVA::Dataset::getNumberOfFeatures
virtual unsigned int getNumberOfFeatures() const =0
Returns the number of features in this dataset.
Belle2::MVA::Dataset::loadEvent
virtual void loadEvent(unsigned int iEvent)=0
Load the event number iEvent.
Belle2::MVA::CombinedDataset::m_background_dataset
Dataset & m_background_dataset
Reference to the wrapped dataset containing background events.
Definition: Dataset.h:340
Belle2::MVA::Dataset::getFeature
virtual std::vector< float > getFeature(unsigned int iFeature)
Returns all values of one feature in a std::vector<float>
Definition: Dataset.cc:86
Belle2::MVA::SubDataset::getNumberOfEvents
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in the wrapped dataset.
Definition: Dataset.h:258
Belle2::MVA::ROOTDataset::m_isDoubleInputType
bool m_isDoubleInputType
Defines the expected datatype in the ROOT file.
Definition: Dataset.h:458
Belle2::MVA::MultiDataset::getNumberOfEvents
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset.
Definition: Dataset.h:214
Belle2::MVA::Dataset::getSpectator
virtual std::vector< float > getSpectator(unsigned int iSpectator)
Returns all values of one spectator in a std::vector<float>
Definition: Dataset.cc:98
Belle2::MVA::ROOTDataset::ROOTDataset
ROOTDataset(const GeneralOptions &_general_options)
Creates a new ROOTDataset.
Definition: Dataset.cc:328
Belle2::MVA::SubDataset::m_spectator_indices
std::vector< unsigned int > m_spectator_indices
Mapping from the position of a spectator in the given subset to its position in the wrapped dataset.
Definition: Dataset.h:283
Belle2::MVA::ROOTDataset::m_spectators_double
std::vector< double > m_spectators_double
Contains all spectators values of the currently loaded event.
Definition: Dataset.h:460
Belle2::MVA::Dataset::getWeights
virtual std::vector< float > getWeights()
Returns all weights.
Definition: Dataset.cc:110
Belle2::MVA::ROOTDataset::setVectorVariableAddress
void setVectorVariableAddress(std::string &variableType, std::vector< std::string > &variableName, T &variableTargets)
sets the branch address for a vector variable to a given target
Definition: Dataset.cc:532
Belle2::MVA::ROOTDataset::loadEvent
virtual void loadEvent(unsigned int event) override
Load the event number iEvent from the TTree.
Definition: Dataset.cc:387
Belle2::MVA::MultiDataset::m_spectator_matrix
std::vector< std::vector< float > > m_spectator_matrix
Spectator matrix.
Definition: Dataset.h:224
Belle2::MVA::CombinedDataset::getNumberOfFeatures
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset, so the size of the given subset of the variables.
Definition: Dataset.h:308
Belle2::MVA::Dataset::getNumberOfSpectators
virtual unsigned int getNumberOfSpectators() const =0
Returns the number of spectators in this dataset.
Belle2::MVA::ROOTDataset::getWeights
virtual std::vector< float > getWeights() override
Returns all values of of the weights in a std::vector<float>
Definition: Dataset.cc:404
Belle2::MVA::SubDataset::SubDataset
SubDataset(const GeneralOptions &general_options, const std::vector< bool > &events, Dataset &dataset)
Constructs a new SubDataset holding a reference to the wrapped Dataset.
Definition: Dataset.cc:198
Belle2::MVA::MultiDataset::getNumberOfSpectators
virtual unsigned int getNumberOfSpectators() const override
Returns the number of spectators in this dataset.
Definition: Dataset.h:209
Belle2::MVA::SubDataset::m_event_indices
std::vector< unsigned int > m_event_indices
Mapping from the position of a event in the given subset to its position in the wrapped dataset.
Definition: Dataset.h:285
Belle2::MVA::MultiDataset::m_targets
std::vector< float > m_targets
target vector
Definition: Dataset.h:225
Belle2::MVA::SubDataset::m_feature_indices
std::vector< unsigned int > m_feature_indices
Mapping from the position of a feature in the given subset to its position in the wrapped dataset.
Definition: Dataset.h:281
Belle2::MVA::Dataset::getSignals
virtual std::vector< bool > getSignals()
Returns all is Signals.
Definition: Dataset.cc:134
Belle2::MVA::SingleDataset::loadEvent
virtual void loadEvent(unsigned int) override
Does nothing in the case of a single dataset, because the only event is already loaded.
Definition: Dataset.h:167
Belle2::MVA::ROOTDataset::setScalarVariableAddress
void setScalarVariableAddress(std::string &variableType, std::string &variableName, T &variableTarget)
sets the branch address for a scalar variable to a given target
Definition: Dataset.cc:510
Belle2::MVA::CombinedDataset::CombinedDataset
CombinedDataset(const GeneralOptions &general_options, Dataset &signal_dataset, Dataset &background_dataset)
Constructs a new CombinedDataset holding a reference to the wrapped Datasets.
Definition: Dataset.cc:285
Belle2::MVA::ROOTDataset::getNumberOfEvents
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset.
Definition: Dataset.h:371
Belle2::MVA::ROOTDataset::m_weight_double
double m_weight_double
Contains the weight of the currently loaded event.
Definition: Dataset.h:461
Belle2::MVA::Dataset::operator=
Dataset & operator=(const Dataset &)=delete
Specify no assignment operator.
Belle2::MVA::SubDataset::getNumberOfSpectators
virtual unsigned int getNumberOfSpectators() const override
Returns the number of spectators in this dataset, so the size of the given subset of the spectators.
Definition: Dataset.h:253
Belle2::MVA::ROOTDataset::getNumberOfFeatures
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
Definition: Dataset.h:361
Belle2::MVA::MultiDataset::MultiDataset
MultiDataset(const GeneralOptions &general_options, const std::vector< std::vector< float >> &input, const std::vector< std::vector< float >> &spectators, const std::vector< float > &targets={}, const std::vector< float > &weights={})
Constructs a new MultiDataset.
Definition: Dataset.cc:157
Belle2::MVA::CombinedDataset::getNumberOfSpectators
virtual unsigned int getNumberOfSpectators() const override
Returns the number of spectators in this dataset, so the size of the given subset of the spectators.
Definition: Dataset.h:313
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MVA::SubDataset
Wraps another Dataset and provides a view to a subset of its features and events.
Definition: Dataset.h:234
Belle2::MVA::Dataset::m_input
std::vector< float > m_input
Contains all feature values of the currently loaded event.
Definition: Dataset.h:124
Belle2::MVA::CombinedDataset::getNumberOfEvents
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in the wrapped dataset.
Definition: Dataset.h:318
Belle2::MVA::Dataset::m_spectators
std::vector< float > m_spectators
Contains all spectators values of the currently loaded event.
Definition: Dataset.h:125
Belle2::MVA::SubDataset::m_dataset
Dataset & m_dataset
Reference to the wrapped dataset.
Definition: Dataset.h:286
Belle2::MVA::GeneralOptions
General options which are shared by all MVA trainings.
Definition: Options.h:64
Belle2::MVA::MultiDataset::loadEvent
virtual void loadEvent(unsigned int iEvent) override
Does nothing in the case of a single dataset, because the only event is already loaded.
Definition: Dataset.cc:180
Belle2::MVA::CombinedDataset::getSpectator
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float> of the wrapped dataset.
Definition: Dataset.cc:318
Belle2::MVA::CombinedDataset::m_signal_dataset
Dataset & m_signal_dataset
Reference to the wrapped dataset containing signal events.
Definition: Dataset.h:339
Belle2::MVA::SingleDataset
Wraps the data of a single event into a Dataset.
Definition: Dataset.h:136
Belle2::MVA::MultiDataset::getNumberOfFeatures
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
Definition: Dataset.h:204
Belle2::MVA::SingleDataset::getFeature
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values (in this case only one) of one feature in a std::vector<float>
Definition: Dataset.h:173
Belle2::MVA::SubDataset::getNumberOfFeatures
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset, so the size of the given subset of the variables.
Definition: Dataset.h:248
Belle2::MVA::ROOTDataset::checkForBranch
bool checkForBranch(TTree *, const std::string &) const
Checks if the given branchname exists in the TTree.
Definition: Dataset.cc:502
Belle2::MVA::ROOTDataset::~ROOTDataset
virtual ~ROOTDataset()
Virtual destructor.
Definition: Dataset.cc:462
Belle2::MVA::MultiDataset::m_weights
std::vector< float > m_weights
weight vector
Definition: Dataset.h:226
Belle2::MVA::SingleDataset::getNumberOfSpectators
virtual unsigned int getNumberOfSpectators() const override
Returns the number of features in this dataset.
Definition: Dataset.h:157
Belle2::MVA::ROOTDataset::getSpectator
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float>
Definition: Dataset.cc:446
Belle2::MVA::ROOTDataset
Proivdes a dataset from a ROOT file This is the usually used dataset providing training data to the m...
Definition: Dataset.h:349
Belle2::MVA::Dataset::getTargets
virtual std::vector< float > getTargets()
Returns all targets.
Definition: Dataset.cc:122
Belle2::MVA::Dataset::m_weight
float m_weight
Contains the weight of the currently loaded event.
Definition: Dataset.h:126
Belle2::MVA::Dataset::getSpectatorIndex
virtual unsigned int getSpectatorIndex(const std::string &spectator)
Return index of spectator with the given name.
Definition: Dataset.cc:74
Belle2::MVA::Dataset::m_isSignal
bool m_isSignal
Defines if the currently loaded event is signal or background.
Definition: Dataset.h:128
Belle2::MVA::MultiDataset::m_matrix
std::vector< std::vector< float > > m_matrix
Feature matrix.
Definition: Dataset.h:223
Belle2::MVA::Dataset::Dataset
Dataset(const GeneralOptions &general_options)
Constructs a new dataset given the general options.
Definition: Dataset.cc:38
Belle2::MVA::ROOTDataset::setBranchAddresses
void setBranchAddresses()
Sets the branch addresses of all features, weight and target again.
Definition: Dataset.cc:539
Belle2::MVA::SubDataset::getSpectator
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float> of the wrapped dataset.
Definition: Dataset.cc:271
Belle2::MVA::CombinedDataset
Wraps two other Datasets, one containing signal, the other background events Used by the reweighting ...
Definition: Dataset.h:294
Belle2::MVA::SingleDataset::getNumberOfFeatures
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
Definition: Dataset.h:152
Belle2::MVA::ROOTDataset::getNumberOfSpectators
virtual unsigned int getNumberOfSpectators() const override
Returns the number of features in this dataset.
Definition: Dataset.h:366
Belle2::MVA::ROOTDataset::getVectorFromTTree
std::vector< float > getVectorFromTTree(std::string &variableType, std::string &branchName, T &memberVariableTarget)
Returns all values for a specified variableType and branchName.
Definition: Dataset.cc:469
Belle2::MVA::SubDataset::loadEvent
virtual void loadEvent(unsigned int iEvent) override
Load the event number iEvent from the wrapped dataset.
Definition: Dataset.cc:237
Belle2::MVA::ROOTDataset::m_target_double
double m_target_double
Contains the target value of the currently loaded event.
Definition: Dataset.h:462
Belle2::MVA::Dataset::getNumberOfEvents
virtual unsigned int getNumberOfEvents() const =0
Returns the number of events in this dataset.
Belle2::MVA::Dataset::m_target
float m_target
Contains the target value of the currently loaded event.
Definition: Dataset.h:127
Belle2::MVA::CombinedDataset::getFeature
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float> of the wrapped dataset.
Definition: Dataset.cc:308
Belle2::MVA::ROOTDataset::m_input_double
std::vector< double > m_input_double
Contains all feature values of the currently loaded event.
Definition: Dataset.h:459
Belle2::MVA::Dataset::getFeatureIndex
virtual unsigned int getFeatureIndex(const std::string &feature)
Return index of feature with the given name.
Definition: Dataset.cc:62
Belle2::MVA::ROOTDataset::setRootInputType
void setRootInputType()
Tries to infer the data-type of a root file and sets m_isDoubleInputType.
Definition: Dataset.cc:590
Belle2::MVA::Dataset::getSignalFraction
virtual float getSignalFraction()
Returns the signal fraction of the whole sample.
Definition: Dataset.cc:47