Belle II Software  light-2205-abys
VariablesToNtupleModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <analysis/modules/VariablesToNtuple/VariablesToNtupleModule.h>
10 
11 // analysis
12 #include <analysis/dataobjects/ParticleList.h>
13 #include <analysis/VariableManager/Manager.h>
14 #include <analysis/VariableManager/Utility.h>
15 #include <analysis/dataobjects/StringWrapper.h>
16 
17 // framework
18 #include <framework/logging/Logger.h>
19 #include <framework/pcore/ProcHandler.h>
20 #include <framework/core/ModuleParam.templateDetails.h>
21 
22 // framework - root utilities
23 #include <framework/utilities/MakeROOTCompatible.h>
24 #include <framework/utilities/RootFileCreationManager.h>
25 
26 #include <cmath>
27 
28 using namespace std;
29 using namespace Belle2;
30 
31 // Register module in the framework
32 REG_MODULE(VariablesToNtuple);
33 
34 
35 VariablesToNtupleModule::VariablesToNtupleModule() :
36  Module(), m_tree("", DataStore::c_Persistent)
37 {
38  //Set module properties
39  setDescription("Calculate variables specified by the user for a given ParticleList and save them into a TNtuple. The TNtuple is candidate-based, meaning that the variables of each candidate are saved separate rows.");
41 
42  vector<string> emptylist;
43  addParam("particleList", m_particleList,
44  "Name of particle list with reconstructed particles. If no list is provided the variables are saved once per event (only possible for event-type variables)",
45  std::string(""));
46  addParam("variables", m_variables,
47  "List of variables (or collections) to save. Variables are taken from Variable::Manager, and are identical to those available to e.g. ParticleSelector.",
48  emptylist);
49 
50  addParam("fileName", m_fileName, "Name of ROOT file for output.", string("VariablesToNtuple.root"));
51  addParam("treeName", m_treeName, "Name of the NTuple in the saved file.", string("ntuple"));
52  addParam("basketSize", m_basketsize, "Size of baskets in Output NTuple in bytes.", 1600);
53 
54  std::tuple<std::string, std::map<int, unsigned int>> default_sampling{"", {}};
55  addParam("sampling", m_sampling,
56  "Tuple of variable name and a map of integer values and inverse sampling rate. E.g. (signal, {1: 0, 0:10}) selects all signal candidates and every 10th background candidate.",
57  default_sampling);
58 }
59 
61 {
62  m_eventMetaData.isRequired();
63  if (not m_particleList.empty())
65 
66  // Initializing the output root file
67  if (m_fileName.empty()) {
68  B2FATAL("Output root file name is not set. Please set a valid root output file name (\"fileName\" module parameter).");
69  }
70  // See if there is already a file in which case add a new tree to it ...
71  // otherwise create a new file (all handled by framework)
73  if (!m_file) {
74  B2ERROR("Could not create file \"" << m_fileName <<
75  "\". Please set a valid root output file name (\"fileName\" module parameter).");
76  return;
77  }
78 
79  TDirectory::TContext directoryGuard(m_file.get());
80 
81  // check if TTree with that name already exists
82  if (m_file->Get(m_treeName.c_str())) {
83  B2FATAL("Tree with the name \"" << m_treeName
84  << "\" already exists in the file \"" << m_fileName << "\"\n"
85  << "\nYou probably want to either set the output fileName or the treeName to something else:\n\n"
86  << " from modularAnalysis import variablesToNtuple\n"
87  << " variablesToNtuple('pi+:all', ['p'], treename='pions', filename='variablesToNtuple.root')\n"
88  << " variablesToNtuple('gamma:all', ['p'], treename='photons', filename='variablesToNtuple.root') # two trees, same file\n"
89  << "\n == Or ==\n"
90  << " from modularAnalysis import variablesToNtuple\n"
91  << " variablesToNtuple('pi+:all', ['p'], filename='pions.root')\n"
92  << " variablesToNtuple('gamma:all', ['p'], filename='photons.root') # two files\n"
93  );
94  return;
95  }
96 
97  // set up tree and register it in the datastore
98  m_tree.registerInDataStore(m_fileName + m_treeName, DataStore::c_DontWriteOut);
99  m_tree.construct(m_treeName.c_str(), "");
100  m_tree->get().SetCacheSize(100000);
101 
102  // declare counter branches - pass through variable list, remove counters added by user
103  m_tree->get().Branch("__experiment__", &m_experiment, "__experiment__/I");
104  m_tree->get().Branch("__run__", &m_run, "__run__/I");
105  m_tree->get().Branch("__event__", &m_event, "__event__/i");
106  m_tree->get().Branch("__production__", &m_production, "__production__/I");
107  if (not m_particleList.empty()) {
108  m_tree->get().Branch("__candidate__", &m_candidate, "__candidate__/I");
109  m_tree->get().Branch("__ncandidates__", &m_ncandidates, "__ncandidates__/I");
110  }
111 
112  if (m_stringWrapper.isOptional("MCDecayString"))
113  m_tree->get().Branch("__MCDecayString__", &m_MCDecayString);
114 
115  for (const auto& variable : m_variables)
116  if (Variable::isCounterVariable(variable)) {
117  B2WARNING("The counter '" << variable
118  << "' is handled automatically by VariablesToNtuple, you don't need to add it.");
119  }
120 
121  // declare branches and get the variable strings
123  // remove duplicates from list of variables but keep the previous order
124  unordered_set<string> seen;
125  auto newEnd = remove_if(m_variables.begin(), m_variables.end(), [&seen](const string & varStr) {
126  if (seen.find(varStr) != std::end(seen)) return true;
127  seen.insert(varStr);
128  return false;
129  });
130  m_variables.erase(newEnd, m_variables.end());
131 
132  m_branchAddressesDouble.resize(m_variables.size() + 1);
133  m_branchAddressesInt.resize(m_variables.size() + 1);
134  m_tree->get().Branch("__weight__", &m_branchAddressesDouble[0], "__weight__/D");
135  size_t enumerate = 1;
136  for (const string& varStr : m_variables) {
137  string branchName = MakeROOTCompatible::makeROOTCompatible(varStr);
138 
139  // Check for deprecated variables
141 
142  // also collection function pointers
144  if (!var) {
145  B2ERROR("Variable '" << varStr << "' is not available in Variable::Manager!");
146  } else {
147  if (m_particleList.empty() && var->description.find("[Eventbased]") == string::npos) {
148  B2ERROR("Variable '" << varStr << "' is not an event-based variable, "
149  "but you are using VariablesToNtuple without a decay string, i.e. in the event-wise mode.\n"
150  "If you have created an event-based alias you can wrap your alias with `eventCached` to "
151  "declare it as event based, which avoids this error.\n\n"
152  "vm.addAlias('myAliasName', 'eventCached(myAlias)')");
153  continue;
154  }
155  if (var->variabletype == Variable::Manager::VariableDataType::c_double) {
156  m_tree->get().Branch(branchName.c_str(), &m_branchAddressesDouble[enumerate], (branchName + "/D").c_str());
157  } else if (var->variabletype == Variable::Manager::VariableDataType::c_int) {
158  m_tree->get().Branch(branchName.c_str(), &m_branchAddressesInt[enumerate], (branchName + "/I").c_str());
159  } else if (var->variabletype == Variable::Manager::VariableDataType::c_bool) {
160  m_tree->get().Branch(branchName.c_str(), &m_branchAddressesInt[enumerate], (branchName + "/O").c_str());
161  }
162  m_functions.push_back(std::make_pair(var->function, var->variabletype));
163  }
164  enumerate++;
165  }
166  m_tree->get().SetBasketSize("*", m_basketsize);
167 
168  m_sampling_name = std::get<0>(m_sampling);
169  m_sampling_rates = std::get<1>(m_sampling);
170 
171  if (m_sampling_name != "") {
173  if (m_sampling_variable == nullptr) {
174  B2FATAL("Couldn't find sample variable " << m_sampling_name << " via the Variable::Manager. Check the name!");
175  }
176  for (const auto& pair : m_sampling_rates)
177  m_sampling_counts[pair.first] = 0;
178  } else {
179  m_sampling_variable = nullptr;
180  }
181 }
182 
183 
185 {
186  if (m_sampling_variable == nullptr)
187  return 1.0;
188 
189  long target = 0;
190  if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_double) {
191  target = std::lround(std::get<double>(m_sampling_variable->function(particle)));
192  } else if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_int) {
193  target = std::lround(std::get<int>(m_sampling_variable->function(particle)));
194  } else if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_bool) {
195  target = std::lround(std::get<bool>(m_sampling_variable->function(particle)));
196  }
197  if (m_sampling_rates.find(target) != m_sampling_rates.end() and m_sampling_rates[target] > 0) {
198  m_sampling_counts[target]++;
199  if (m_sampling_counts[target] % m_sampling_rates[target] != 0)
200  return 0;
201  else {
202  m_sampling_counts[target] = 0;
203  return m_sampling_rates[target];
204  }
205  }
206  return 1.0;
207 }
208 
210 {
211  m_event = m_eventMetaData->getEvent();
212  m_run = m_eventMetaData->getRun();
213  m_experiment = m_eventMetaData->getExperiment();
214  m_production = m_eventMetaData->getProduction();
215 
216  if (m_stringWrapper.isValid())
217  m_MCDecayString = m_stringWrapper->getString();
218  else
219  m_MCDecayString = "";
220 
221  if (m_particleList.empty()) {
223  if (m_branchAddressesDouble[0] > 0) {
224  for (unsigned int iVar = 0; iVar < m_variables.size(); iVar++) {
225  auto var_result = std::get<0>(m_functions[iVar])(nullptr);
226  auto var_type = std::get<1>(m_functions[iVar]);
227  if (std::holds_alternative<double>(var_result)) {
228  if (var_type != Variable::Manager::VariableDataType::c_double)
229  B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
230  "'. Expected Variable::Manager::VariableDataType::c_double. Exported data for this variable might be incorrect.");
231  m_branchAddressesDouble[iVar + 1] = std::get<double>(var_result);
232  } else if (std::holds_alternative<int>(var_result)) {
233  if (var_type != Variable::Manager::VariableDataType::c_int)
234  B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
235  "'. Expected Variable::Manager::VariableDataType::c_int. Exported data for this variable might be incorrect.");
236  m_branchAddressesInt[iVar + 1] = std::get<int>(var_result);
237  } else if (std::holds_alternative<bool>(var_result)) {
238  if (var_type != Variable::Manager::VariableDataType::c_bool)
239  B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
240  "'. Expected Variable::Manager::VariableDataType::c_bool. Exported data for this variable might be incorrect.");
241  m_branchAddressesInt[iVar + 1] = std::get<bool>(var_result);
242  }
243  }
244  m_tree->get().Fill();
245  }
246 
247  } else {
249  m_ncandidates = particlelist->getListSize();
250  for (unsigned int iPart = 0; iPart < m_ncandidates; iPart++) {
251  m_candidate = iPart;
252  const Particle* particle = particlelist->getParticle(iPart);
254  if (m_branchAddressesDouble[0] > 0) {
255  for (unsigned int iVar = 0; iVar < m_variables.size(); iVar++) {
256  auto var_result = std::get<0>(m_functions[iVar])(particle);
257  auto var_type = std::get<1>(m_functions[iVar]);
258  if (std::holds_alternative<double>(var_result)) {
259  if (var_type != Variable::Manager::VariableDataType::c_double)
260  B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
261  "'. Expected Variable::Manager::VariableDataType::c_double. Exported data for this variable might be incorrect.");
262  m_branchAddressesDouble[iVar + 1] = std::get<double>(var_result);
263  } else if (std::holds_alternative<int>(var_result)) {
264  if (var_type != Variable::Manager::VariableDataType::c_int)
265  B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
266  "'. Expected Variable::Manager::VariableDataType::c_int. Exported data for this variable might be incorrect.");
267  m_branchAddressesInt[iVar + 1] = std::get<int>(var_result);
268  } else if (std::holds_alternative<bool>(var_result)) {
269  if (var_type != Variable::Manager::VariableDataType::c_bool)
270  B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
271  "'. Expected Variable::Manager::VariableDataType::c_bool. Exported data for this variable might be incorrect.");
272  m_branchAddressesInt[iVar + 1] = std::get<bool>(var_result);
273  }
274  }
275  m_tree->get().Fill();
276  }
277  }
278  }
279 }
280 
282 {
284  B2INFO("Writing NTuple " << m_treeName);
285  TDirectory::TContext directoryGuard(m_file.get());
286  m_tree->write(m_file.get());
287 
288  const bool writeError = m_file->TestBit(TFile::kWriteError);
289  m_file.reset();
290  if (writeError) {
291  B2FATAL("A write error occurred while saving '" << m_fileName << "', please check if enough disk space is available.");
292  }
293  }
294 }
In the store you can park objects that have to be accessed by various modules.
Definition: DataStore.h:51
@ c_DontWriteOut
Object/array should be NOT saved by output modules.
Definition: DataStore.h:71
static std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_ParallelProcessingCertified
This module can be run in parallel processing mode safely (All I/O must be done through the data stor...
Definition: Module.h:80
@ c_TerminateInAllProcesses
When using parallel processing, call this module's terminate() function in all processes().
Definition: Module.h:83
Class to store reconstructed particles.
Definition: Particle.h:74
static bool isOutputProcess()
Return true if the process is an output process.
Definition: ProcHandler.cc:232
static bool parallelProcessingUsed()
Returns true if multiple processes have been spawned, false in single-core mode.
Definition: ProcHandler.cc:226
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:95
std::vector< std::string > resolveCollections(const std::vector< std::string > &variables)
Resolve Collection Returns variable names corresponding to the given collection or if it is not a col...
Definition: Manager.cc:167
const Var * getVariable(std::string name)
Get the variable belonging to the given key.
Definition: Manager.cc:57
static Manager & Instance()
get singleton instance.
Definition: Manager.cc:25
void checkDeprecatedVariable(const std::string &name)
Check if a variable is deprecated.
Definition: Manager.cc:431
std::vector< std::string > m_variables
List of variables to save.
virtual void initialize() override
Initialises the module.
std::map< int, unsigned int > m_sampling_rates
Inverse sampling rates.
virtual void event() override
Method called for each event.
unsigned int m_ncandidates
total n candidates
virtual void terminate() override
Write TTree to file, and close file if necessary.
StoreObjPtr< EventMetaData > m_eventMetaData
the event information
std::map< int, unsigned long int > m_sampling_counts
Current number of samples with this value.
std::string m_fileName
Name of ROOT file for output.
std::vector< std::pair< Variable::Manager::FunctionPtr, Variable::Manager::VariableDataType > > m_functions
List of pairs of function pointers and respective data type corresponding to given variables.
std::tuple< std::string, std::map< int, unsigned int > > m_sampling
Tuple of variable name and a map of integer values and inverse sampling rate.
std::vector< int > m_branchAddressesInt
Branch addresses of variables of type int (or bool)
int m_basketsize
Size of TBaskets in the output ROOT file in bytes.
int m_production
production ID (to distinguish MC samples)
StoreObjPtr< StringWrapper > m_stringWrapper
string wrapper storing the MCDecayString
std::string m_particleList
Name of particle list with reconstructed particles.
StoreObjPtr< RootMergeable< TTree > > m_tree
The ROOT TNtuple for output.
std::shared_ptr< TFile > m_file
ROOT file for output.
std::vector< double > m_branchAddressesDouble
Branch addresses of variables of type double.
std::string m_sampling_name
Variable name of sampling variable.
float getInverseSamplingRateWeight(const Particle *particle)
Calculate inverse sampling rate weight.
std::string m_treeName
Name of the TTree.
const Variable::Manager::Var * m_sampling_variable
Variable Pointer to target variable.
std::string m_MCDecayString
MC decay string to be filled.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
std::shared_ptr< TFile > getFile(std::string, bool ignoreErrors=false)
Get a file with a specific name, if is does not exist it will be created.
static RootFileCreationManager & getInstance()
Interface for the FileManager.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:23
VariableDataType variabletype
data type of variable
Definition: Manager.h:133
A variable returning a floating-point value for a given Particle.
Definition: Manager.h:146
FunctionPtr function
Pointer to function.
Definition: Manager.h:147