10 #ifndef INCLUDE_GUARD_BELLE2_MVA_DATASET_HEADER
11 #define INCLUDE_GUARD_BELLE2_MVA_DATASET_HEADER
13 #include <mva/interface/Options.h>
15 #include <analysis/VariableManager/Manager.h>
99 virtual std::vector<float>
getFeature(
unsigned int iFeature);
105 virtual std::vector<float>
getSpectator(
unsigned int iSpectator);
146 const std::vector<float>& spectators = std::vector<float>());
172 virtual std::vector<float>
getFeature(
unsigned int iFeature)
override {
return std::vector<float> {
m_input[iFeature]}; }
178 virtual std::vector<float>
getSpectator(
unsigned int iSpectator)
override {
return std::vector<float> {
m_spectators[iSpectator]}; }
198 const std::vector<std::vector<float>>& spectators,
199 const std::vector<float>& targets = {},
const std::vector<float>& weights = {});
219 virtual void loadEvent(
unsigned int iEvent)
override;
264 virtual void loadEvent(
unsigned int iEvent)
override;
270 virtual std::vector<float>
getFeature(
unsigned int iFeature)
override;
276 virtual std::vector<float>
getSpectator(
unsigned int iSpectator)
override;
280 std::vector<unsigned int>
282 std::vector<unsigned int>
284 std::vector<unsigned int>
324 virtual void loadEvent(
unsigned int iEvent)
override;
330 virtual std::vector<float>
getFeature(
unsigned int iFeature)
override;
336 virtual std::vector<float>
getSpectator(
unsigned int iSpectator)
override;
380 virtual void loadEvent(
unsigned int event)
override;
386 virtual std::vector<float>
getFeature(
unsigned int iFeature)
override;
391 virtual std::vector<float>
getWeights()
override;
397 virtual std::vector<float>
getSpectator(
unsigned int iSpectator)
override;
410 std::vector<RootDatasetVarVariant>
428 std::vector<float>
getVectorFromTTree(
const std::string& variableType,
const std::string& branchName, T& memberVariableTarget);
483 std::vector<RootDatasetVarVariant>& varVariantTargets);
Wraps two other Datasets, one containing signal, the other background events Used by the reweighting ...
CombinedDataset(const GeneralOptions &general_options, Dataset &signal_dataset, Dataset &background_dataset)
Constructs a new CombinedDataset holding a reference to the wrapped Datasets.
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in the wrapped dataset.
Dataset & m_background_dataset
Reference to the wrapped dataset containing background events.
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float> of the wrapped dataset.
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float> of the wrapped dataset.
virtual void loadEvent(unsigned int iEvent) override
Load the event number iEvent from the wrapped dataset.
virtual unsigned int getNumberOfSpectators() const override
Returns the number of spectators in this dataset, so the size of the given subset of the spectators.
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset, so the size of the given subset of the variables.
Dataset & m_signal_dataset
Reference to the wrapped dataset containing signal events.
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
virtual unsigned int getNumberOfEvents() const =0
Returns the number of events in this dataset.
virtual unsigned int getNumberOfSpectators() const =0
Returns the number of spectators in this dataset.
Dataset(const Dataset &)=delete
Specify no copy constructor.
virtual std::vector< bool > getSignals()
Returns all is Signals.
virtual unsigned int getNumberOfFeatures() const =0
Returns the number of features in this dataset.
Dataset & operator=(const Dataset &)=delete
Specify no assignment operator.
virtual unsigned int getFeatureIndex(const std::string &feature)
Return index of feature with the given name.
virtual std::vector< float > getSpectator(unsigned int iSpectator)
Returns all values of one spectator in a std::vector<float>
std::vector< float > m_spectators
Contains all spectators values of the currently loaded event.
virtual std::vector< float > getTargets()
Returns all targets.
virtual void loadEvent(unsigned int iEvent)=0
Load the event number iEvent.
GeneralOptions m_general_options
GeneralOptions passed to this dataset.
std::vector< float > m_input
Contains all feature values of the currently loaded event.
Dataset(const GeneralOptions &general_options)
Constructs a new dataset given the general options.
virtual std::vector< float > getFeature(unsigned int iFeature)
Returns all values of one feature in a std::vector<float>
virtual std::vector< float > getWeights()
Returns all weights.
virtual float getSignalFraction()
Returns the signal fraction of the whole sample.
bool m_isSignal
Defines if the currently loaded event is signal or background.
float m_weight
Contains the weight of the currently loaded event.
virtual unsigned int getSpectatorIndex(const std::string &spectator)
Return index of spectator with the given name.
float m_target
Contains the target value of the currently loaded event.
virtual ~Dataset()=default
Virtual default destructor.
General options which are shared by all MVA trainings.
unsigned int m_max_events
Maximum number of events to process, 0 means all.
Wraps the data of a multiple event into a Dataset.
std::vector< float > m_weights
weight vector
std::vector< std::vector< float > > m_matrix
Feature matrix.
std::vector< std::vector< float > > m_spectator_matrix
Spectator matrix.
MultiDataset(const GeneralOptions &general_options, const std::vector< std::vector< float >> &input, const std::vector< std::vector< float >> &spectators, const std::vector< float > &targets={}, const std::vector< float > &weights={})
Constructs a new MultiDataset.
std::vector< float > m_targets
target vector
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset.
virtual void loadEvent(unsigned int iEvent) override
Does nothing in the case of a single dataset, because the only event is already loaded.
virtual unsigned int getNumberOfSpectators() const override
Returns the number of spectators in this dataset.
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
Proivdes a dataset from a ROOT file This is the usually used dataset providing training data to the m...
void setScalarVariableAddress(const std::string &variableType, const std::string &variableName, T &variableTarget)
sets the branch address for a scalar variable to a given target
void setBranchAddresses()
Sets the branch addresses of all features, weight and target again.
void setTargetRootInputType()
Determines the data type of the target variable and sets it to m_target_data_type.
void setScalarVariableAddressVariant(const std::string &variableType, const std::string &variableName, RootDatasetVarVariant &variableTarget)
sets the branch address for a scalar variable to a given target
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset.
void initialiseVarVariantForBranch(const std::string, RootDatasetVarVariant &)
Infers the type (double,float,int,bool) from the TTree and initialises the VarVariant with the correc...
TChain * m_tree
Pointer to the TChain containing the data.
virtual void loadEvent(unsigned int event) override
Load the event number iEvent from the TTree.
void setVectorVariableAddressVariant(const std::string &variableType, const std::vector< std::string > &variableName, std::vector< RootDatasetVarVariant > &varVariantTargets)
sets the branch address for a vector of VarVariant to a given target
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float>
std::vector< float > getVectorFromTTree(const std::string &variableType, const std::string &branchName, T &memberVariableTarget)
Returns all values for a specified variableType and branchName.
void initialiseVarVariantType(const std::string, RootDatasetVarVariant &)
Initialises the VarVariant.
std::vector< RootDatasetVarVariant > m_spectators_variant
Contains all spectators values of the currently loaded event.
RootDatasetVarVariant m_target_variant
Contains the target value of the currently loaded event.
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float>
virtual std::vector< float > getWeights() override
Returns all values of of the weights in a std::vector<float>
std::vector< RootDatasetVarVariant > m_input_variant
Contains all feature values of the currently loaded event.
ROOTDataset(const GeneralOptions &_general_options)
Creates a new ROOTDataset.
void setRootInputType()
Tries to infer the data-type of the spectator and feature variables in a root file.
virtual unsigned int getNumberOfSpectators() const override
Returns the number of features in this dataset.
bool checkForBranch(TTree *, const std::string &) const
Checks if the given branchname exists in the TTree.
void setVectorVariableAddress(const std::string &variableType, const std::vector< std::string > &variableName, T &variableTargets)
sets the branch address for a vector variable to a given target
virtual ~ROOTDataset()
Virtual destructor.
float castVarVariantToFloat(RootDatasetVarVariant &) const
Casts a VarVariant which can contain <double,int,bool,float> to float.
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
std::variant< double, float, int, bool > RootDatasetVarVariant
Typedef for variable types supported by the mva ROOTDataset, can be one of double,...
std::vector< float > getVectorFromTTreeVariant(const std::string &variableType, const std::string &branchName, RootDatasetVarVariant &memberVariableTarget)
Returns all values for a specified variableType and branchName.
RootDatasetVarVariant m_weight_variant
Contains the weight of the currently loaded event.
Wraps the data of a single event into a Dataset.
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in this dataset which is always one.
SingleDataset(const GeneralOptions &general_options, const std::vector< float > &input, float target=1.0, const std::vector< float > &spectators=std::vector< float >())
Constructs a new SingleDataset.
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values (in this case only one) of one feature in a std::vector<float>
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values (in this case only one) of one spectator in a std::vector<float>
virtual unsigned int getNumberOfSpectators() const override
Returns the number of features in this dataset.
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset.
virtual void loadEvent(unsigned int) override
Does nothing in the case of a single dataset, because the only event is already loaded.
Wraps another Dataset and provides a view to a subset of its features and events.
Dataset & m_dataset
Reference to the wrapped dataset.
SubDataset(const GeneralOptions &general_options, const std::vector< bool > &events, Dataset &dataset)
Constructs a new SubDataset holding a reference to the wrapped Dataset.
virtual unsigned int getNumberOfEvents() const override
Returns the number of events in the wrapped dataset.
virtual std::vector< float > getSpectator(unsigned int iSpectator) override
Returns all values of one spectator in a std::vector<float> of the wrapped dataset.
std::vector< unsigned int > m_feature_indices
Mapping from the position of a feature in the given subset to its position in the wrapped dataset.
virtual std::vector< float > getFeature(unsigned int iFeature) override
Returns all values of one feature in a std::vector<float> of the wrapped dataset.
std::vector< unsigned int > m_spectator_indices
Mapping from the position of a spectator in the given subset to its position in the wrapped dataset.
virtual void loadEvent(unsigned int iEvent) override
Load the event number iEvent from the wrapped dataset.
virtual unsigned int getNumberOfSpectators() const override
Returns the number of spectators in this dataset, so the size of the given subset of the spectators.
std::vector< unsigned int > m_event_indices
Mapping from the position of a event in the given subset to its position in the wrapped dataset.
bool m_use_event_indices
Use only a subset of the wrapped dataset events.
virtual unsigned int getNumberOfFeatures() const override
Returns the number of features in this dataset, so the size of the given subset of the variables.
Abstract base class for different kinds of events.