Belle II Software  light-2212-foldex
VariablesToEventBasedTreeModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <analysis/modules/VariablesToEventBasedTree/VariablesToEventBasedTreeModule.h>
10 
11 #include <analysis/dataobjects/ParticleList.h>
12 #include <analysis/dataobjects/StringWrapper.h>
13 #include <analysis/VariableManager/Manager.h>
14 #include <analysis/VariableManager/Utility.h>
15 #include <framework/logging/Logger.h>
16 #include <framework/pcore/ProcHandler.h>
17 #include <framework/utilities/MakeROOTCompatible.h>
18 #include <framework/utilities/RootFileCreationManager.h>
19 #include <framework/core/ModuleParam.templateDetails.h>
20 
21 #include <cmath>
22 
23 using namespace std;
24 using namespace Belle2;
25 
26 // Register module in the framework
27 REG_MODULE(VariablesToEventBasedTree);
28 
29 
30 VariablesToEventBasedTreeModule::VariablesToEventBasedTreeModule() :
31  Module(), m_tree("", DataStore::c_Persistent)
32 {
33  //Set module properties
34  setDescription("Calculate variables specified by the user for a given ParticleList and save them into a TTree. The Tree is event-based, meaning that the variables of each candidate for each event are saved in an array of a branch of the Tree.");
36 
37  vector<string> emptylist;
38  addParam("particleList", m_particleList,
39  "Name of particle list with reconstructed particles. An empty ParticleList is not supported. Use the VariablesToNtupleModule for this use-case",
40  std::string(""));
41  addParam("variables", m_variables,
42  "List of variables (or collections) to save for each candidate. Variables are taken from Variable::Manager, and are identical to those available to e.g. ParticleSelector.",
43  emptylist);
44 
45  addParam("event_variables", m_event_variables,
46  "List of variables (or collections) to save for each event. Variables are taken from Variable::Manager, and are identical to those available to e.g. ParticleSelector. Only event-based variables are allowed here.",
47  emptylist);
48 
49  addParam("fileName", m_fileName, "Name of ROOT file for output.", string("VariablesToEventBasedTree.root"));
50  addParam("treeName", m_treeName, "Name of the NTuple in the saved file.", string("tree"));
51  addParam("maxCandidates", m_maxCandidates, "The maximum number of candidates in the ParticleList per entry of the Tree.", 100u);
52 
53  std::tuple<std::string, std::map<int, unsigned int>> default_sampling{"", {}};
54  addParam("sampling", m_sampling,
55  "Tuple of variable name and a map of integer values and inverse sampling rate. E.g. (signal, {1: 0, 0:10}) selects all signal events and every 10th background event. Variable must be event-based.",
56  default_sampling);
57 }
58 
60 {
61  m_eventMetaData.isRequired();
63 
64  // See if there is already a file in which case add a new tree to it ...
65  // otherwise create a new file (all handled by framework)
67  if (!m_file) {
68  B2ERROR("Could not create file \"" << m_fileName <<
69  "\". Please set a valid root output file name (\"fileName\" module parameter).");
70  return;
71  }
72 
73  m_file->cd();
74 
75  // check if TTree with that name already exists
76  if (m_file->Get(m_treeName.c_str())) {
77  B2FATAL("Tree with the name " << m_treeName << " already exists in the file " << m_fileName);
78  return;
79  }
80 
82  // remove duplicates from list of variables but keep the previous order
83  unordered_set<string> seen;
84  auto newEnd = remove_if(m_variables.begin(), m_variables.end(), [&seen](const string & varStr) {
85  if (seen.find(varStr) != std::end(seen)) return true;
86  seen.insert(varStr);
87  return false;
88  });
89  m_variables.erase(newEnd, m_variables.end());
90 
92  // remove duplicates from list of variables but keep the previous order
93  unordered_set<string> seenEventVariables;
94  auto eventVariablesEnd = remove_if(m_event_variables.begin(),
95  m_event_variables.end(), [&seenEventVariables](const string & varStr) {
96  if (seenEventVariables.find(varStr) != std::end(seenEventVariables)) return true;
97  seenEventVariables.insert(varStr);
98  return false;
99  });
100  m_event_variables.erase(eventVariablesEnd, m_event_variables.end());
101 
102  m_tree.registerInDataStore(m_fileName + m_treeName, DataStore::c_DontWriteOut);
103  m_tree.construct(m_treeName.c_str(), "");
104 
105  m_valuesDouble.resize(m_variables.size());
106  m_valuesInt.resize(m_variables.size());
108  m_event_valuesInt.resize(m_event_variables.size());
109 
110  m_tree->get().Branch("__event__", &m_event, "__event__/i");
111  m_tree->get().Branch("__run__", &m_run, "__run__/I");
112  m_tree->get().Branch("__experiment__", &m_experiment, "__experiment__/I");
113  m_tree->get().Branch("__production__", &m_production, "__production__/I");
114  m_tree->get().Branch("__ncandidates__", &m_ncandidates, "__ncandidates__/I");
115  m_tree->get().Branch("__weight__", &m_weight, "__weight__/F");
116 
117  if (m_stringWrapper.isOptional("MCDecayString"))
118  m_tree->get().Branch("__MCDecayString__", &m_MCDecayString);
119 
120  for (unsigned int i = 0; i < m_event_variables.size(); ++i) {
121  auto varStr = m_event_variables[i];
122 
123  if (Variable::isCounterVariable(varStr)) {
124  B2WARNING("The counter '" << varStr
125  << "' is handled automatically by VariablesToEventBasedTree, you don't need to add it.");
126  continue;
127  }
128 
129  //also collection function pointers
131  if (!var) {
132  B2ERROR("Variable '" << varStr << "' is not available in Variable::Manager!");
133  } else {
134  if (var->variabletype == Variable::Manager::VariableDataType::c_double) {
135  m_tree->get().Branch(MakeROOTCompatible::makeROOTCompatible(varStr).c_str(), &m_event_valuesDouble[i],
136  (MakeROOTCompatible::makeROOTCompatible(varStr) + "/D").c_str());
137  } else if (var->variabletype == Variable::Manager::VariableDataType::c_int) {
138  m_tree->get().Branch(MakeROOTCompatible::makeROOTCompatible(varStr).c_str(), &m_event_valuesInt[i],
139  (MakeROOTCompatible::makeROOTCompatible(varStr) + "/I").c_str());
140  } else if (var->variabletype == Variable::Manager::VariableDataType::c_bool) {
141  m_tree->get().Branch(MakeROOTCompatible::makeROOTCompatible(varStr).c_str(), &m_event_valuesInt[i],
142  (MakeROOTCompatible::makeROOTCompatible(varStr) + "/O").c_str());
143  }
144  m_event_functions.push_back(var->function);
145  }
146  }
147 
148  for (unsigned int i = 0; i < m_variables.size(); ++i) {
149  auto varStr = m_variables[i];
150  m_valuesDouble[i].resize(m_maxCandidates);
151  m_valuesInt[i].resize(m_maxCandidates);
152 
153  //also collection function pointers
155  if (!var) {
156  B2ERROR("Variable '" << varStr << "' is not available in Variable::Manager!");
157  } else {
158  if (var->variabletype == Variable::Manager::VariableDataType::c_double) {
159  m_tree->get().Branch(MakeROOTCompatible::makeROOTCompatible(varStr).c_str(), &m_valuesDouble[i][0],
160  (MakeROOTCompatible::makeROOTCompatible(varStr) + "[__ncandidates__]/D").c_str());
161  } else if (var->variabletype == Variable::Manager::VariableDataType::c_int) {
162  m_tree->get().Branch(MakeROOTCompatible::makeROOTCompatible(varStr).c_str(), &m_valuesInt[i][0],
163  (MakeROOTCompatible::makeROOTCompatible(varStr) + "[__ncandidates__]/I").c_str());
164  } else if (var->variabletype == Variable::Manager::VariableDataType::c_bool) {
165  m_tree->get().Branch(MakeROOTCompatible::makeROOTCompatible(varStr).c_str(), &m_valuesInt[i][0],
166  (MakeROOTCompatible::makeROOTCompatible(varStr) + "[__ncandidates__]/O").c_str());
167  }
168  m_functions.push_back(var->function);
169  }
170  }
171 
172  m_sampling_name = std::get<0>(m_sampling);
173  m_sampling_rates = std::get<1>(m_sampling);
174 
175  if (m_sampling_name != "") {
177  if (m_sampling_variable == nullptr) {
178  B2FATAL("Couldn't find sample variable " << m_sampling_name << " via the Variable::Manager. Check the name!");
179  }
180  for (const auto& pair : m_sampling_rates)
181  m_sampling_counts[pair.first] = 0;
182  } else {
183  m_sampling_variable = nullptr;
184  }
185 
186 }
187 
188 
190 {
191 
192  if (m_sampling_variable == nullptr)
193  return 1.0;
194 
195  long target = 0;
196  if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_double) {
197  target = std::lround(std::get<double>(m_sampling_variable->function(nullptr)));
198  } else if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_int) {
199  target = std::lround(std::get<int>(m_sampling_variable->function(nullptr)));
200  } else if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_bool) {
201  target = std::lround(std::get<bool>(m_sampling_variable->function(nullptr)));
202  }
203 
204  if (m_sampling_rates.find(target) != m_sampling_rates.end() and m_sampling_rates[target] > 0) {
205  m_sampling_counts[target]++;
206  if (m_sampling_counts[target] % m_sampling_rates[target] != 0)
207  return 0;
208  else {
209  m_sampling_counts[target] = 0;
210  return m_sampling_rates[target];
211  }
212  }
213 
214  return 1.0;
215 }
216 
218 {
219  // get counter numbers
220  m_event = m_eventMetaData->getEvent();
221  m_run = m_eventMetaData->getRun();
222  m_experiment = m_eventMetaData->getExperiment();
223  m_production = m_eventMetaData->getProduction();
224 
225  if (m_stringWrapper.isValid())
226  m_MCDecayString = m_stringWrapper->getString();
227  else
228  m_MCDecayString = "";
229 
231  m_ncandidates = particlelist->getListSize();
233  if (m_weight > 0) {
234  for (unsigned int iVar = 0; iVar < m_event_functions.size(); iVar++) {
235  if (std::holds_alternative<double>(m_event_functions[iVar](nullptr))) {
236  m_event_valuesDouble[iVar] = std::get<double>(m_event_functions[iVar](nullptr));
237  } else if (std::holds_alternative<int>(m_event_functions[iVar](nullptr))) {
238  m_event_valuesInt[iVar] = std::get<int>(m_event_functions[iVar](nullptr));
239  } else if (std::holds_alternative<bool>(m_event_functions[iVar](nullptr))) {
240  m_event_valuesInt[iVar] = std::get<bool>(m_event_functions[iVar](nullptr));
241  }
242  }
243  for (unsigned int iPart = 0; iPart < m_ncandidates; iPart++) {
244 
245  if (iPart >= m_maxCandidates) {
246  B2WARNING("Maximum number of candidates exceeded in VariablesToEventBasedTree module. I will skip additional candidates");
247  break;
248  }
249 
250  const Particle* particle = particlelist->getParticle(iPart);
251  for (unsigned int iVar = 0; iVar < m_functions.size(); iVar++) {
252  if (std::holds_alternative<double>(m_functions[iVar](particle))) {
253  m_valuesDouble[iVar][iPart] = std::get<double>(m_functions[iVar](particle));
254  } else if (std::holds_alternative<int>(m_functions[iVar](particle))) {
255  m_valuesInt[iVar][iPart] = std::get<int>(m_functions[iVar](particle));
256  } else if (std::holds_alternative<bool>(m_functions[iVar](particle))) {
257  m_valuesInt[iVar][iPart] = std::get<bool>(m_functions[iVar](particle));
258  }
259  }
260  }
261  m_tree->get().Fill();
262  }
263 }
264 
266 {
268  B2INFO("Writing TTree " << m_treeName);
269  TDirectory::TContext directoryGuard(m_file.get());
270  m_tree->write(m_file.get());
271 
272  const bool writeError = m_file->TestBit(TFile::kWriteError);
273  if (writeError) {
274  B2FATAL("A write error occurred while saving '" << m_fileName << "', please check if enough disk space is available.");
275  }
276  }
277 }
In the store you can park objects that have to be accessed by various modules.
Definition: DataStore.h:51
@ c_DontWriteOut
Object/array should be NOT saved by output modules.
Definition: DataStore.h:71
static std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_ParallelProcessingCertified
This module can be run in parallel processing mode safely (All I/O must be done through the data stor...
Definition: Module.h:80
@ c_TerminateInAllProcesses
When using parallel processing, call this module's terminate() function in all processes().
Definition: Module.h:83
Class to store reconstructed particles.
Definition: Particle.h:74
static bool isOutputProcess()
Return true if the process is an output process.
Definition: ProcHandler.cc:232
static bool parallelProcessingUsed()
Returns true if multiple processes have been spawned, false in single-core mode.
Definition: ProcHandler.cc:226
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:95
std::vector< std::string > resolveCollections(const std::vector< std::string > &variables)
Resolve Collection Returns variable names corresponding to the given collection or if it is not a col...
Definition: Manager.cc:179
const Var * getVariable(std::string name)
Get the variable belonging to the given key.
Definition: Manager.cc:57
static Manager & Instance()
get singleton instance.
Definition: Manager.cc:25
std::vector< std::string > m_variables
List of variables to save.
unsigned int m_maxCandidates
maximum number of candidates which is written out
std::vector< std::string > m_event_variables
List of event variables to save.
virtual void initialize() override
Initialises the module.
std::map< int, unsigned int > m_sampling_rates
Inverse sampling rates.
std::vector< int > m_event_valuesInt
Values of type int corresponding to given event variables.
virtual void event() override
Method called for each event.
unsigned int m_ncandidates
number of candidates in this event
virtual void terminate() override
Write TTree to file, and close file if necessary.
StoreObjPtr< EventMetaData > m_eventMetaData
event metadata (get event number etc)
std::map< int, unsigned long int > m_sampling_counts
Current number of samples with this value.
std::string m_fileName
Name of ROOT file for output.
std::tuple< std::string, std::map< int, unsigned int > > m_sampling
Tuple of variable name and a map of integer values and inverse sampling rate.
std::vector< std::vector< double > > m_valuesDouble
Values of type double corresponding to given variables.
float getInverseSamplingRateWeight()
Calculate inverse sampling rate weight.
int m_production
production ID (to distinguish MC samples)
StoreObjPtr< StringWrapper > m_stringWrapper
string wrapper storing the MCDecayString
std::vector< Variable::Manager::FunctionPtr > m_functions
List of function pointers corresponding to given variables.
std::string m_particleList
Name of particle list with reconstructed particles.
StoreObjPtr< RootMergeable< TTree > > m_tree
The ROOT TNtuple for output.
std::shared_ptr< TFile > m_file
ROOT file for output.
std::string m_sampling_name
Variable name of sampling variable.
std::vector< std::vector< int > > m_valuesInt
Values of type int corresponding to given variables.
std::vector< double > m_event_valuesDouble
Values of type double corresponding to given event variables.
const Variable::Manager::Var * m_sampling_variable
Variable Pointer to target variable.
std::vector< Variable::Manager::FunctionPtr > m_event_functions
List of function pointers corresponding to given event variables.
std::string m_MCDecayString
MC decay string to be filled.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
std::shared_ptr< TFile > getFile(std::string, bool ignoreErrors=false)
Get a file with a specific name, if is does not exist it will be created.
static RootFileCreationManager & getInstance()
Interface for the FileManager.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:23
VariableDataType variabletype
data type of variable
Definition: Manager.h:133
A variable returning a floating-point value for a given Particle.
Definition: Manager.h:146
FunctionPtr function
Pointer to function.
Definition: Manager.h:147