Belle II Software light-2406-ragdoll
VariablesToNtupleModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <analysis/modules/VariablesToNtuple/VariablesToNtupleModule.h>
10
11// analysis
12#include <analysis/dataobjects/ParticleList.h>
13#include <analysis/VariableManager/Manager.h>
14#include <analysis/VariableManager/Utility.h>
15#include <analysis/dataobjects/StringWrapper.h>
16
17// framework
18#include <framework/logging/Logger.h>
19#include <framework/pcore/ProcHandler.h>
20#include <framework/core/ModuleParam.templateDetails.h>
21#include <framework/core/Environment.h>
22
23// framework - root utilities
24#include <framework/utilities/MakeROOTCompatible.h>
25#include <framework/utilities/RootFileCreationManager.h>
26
27#include <cmath>
28
29using namespace std;
30using namespace Belle2;
31
32// Register module in the framework
33REG_MODULE(VariablesToNtuple);
34
35
37 Module(), m_tree("", DataStore::c_Persistent)
38{
39 //Set module properties
40 setDescription("Calculate variables specified by the user for a given ParticleList and save them into a TNtuple. The TNtuple is candidate-based, meaning that the variables of each candidate are saved into separate rows.");
42
43 vector<string> emptylist;
44 addParam("particleList", m_particleList,
45 "Name of particle list with reconstructed particles. If no list is provided the variables are saved once per event (only possible for event-type variables)",
46 std::string(""));
47 addParam("variables", m_variables,
48 "List of variables (or collections) to save. Variables are taken from Variable::Manager, and are identical to those available to e.g. ParticleSelector.",
49 emptylist);
50
51 addParam("fileName", m_fileName, "Name of ROOT file for output. Can be overridden using the -o argument of basf2.",
52 string("VariablesToNtuple.root"));
53 addParam("treeName", m_treeName, "Name of the NTuple in the saved file.", string("ntuple"));
54 addParam("basketSize", m_basketsize, "Size of baskets in Output NTuple in bytes.", 1600);
55
56 std::tuple<std::string, std::map<int, unsigned int>> default_sampling{"", {}};
57 addParam("sampling", m_sampling,
58 "Tuple of variable name and a map of integer values and inverse sampling rate. E.g. (signal, {1: 0, 0:10}) selects all signal candidates and every 10th background candidate.",
59 default_sampling);
60
61 addParam("signalSideParticleList", m_signalSideParticleList,
62 "Name of signal-side particle list to store the index of the signal-side particle when one calls the module in a for_each loop over the RestOfEvent",
63 std::string(""));
64
65 addParam("fileNameSuffix", m_fileNameSuffix, "The suffix of the output ROOT file to be appended before ``.root``.",
66 string(""));
67
68 addParam("useFloat", m_useFloat,
69 "Use float type for floating-point numbers.", false);
70
71 addParam("storeEventType", m_storeEventType,
72 "If true, the branch __eventType__ is added. The eventType information is available from MC16 on.", true);
73
74 addParam("ignoreCommandLineOverride", m_ignoreCommandLineOverride,
75 "Ignore override of file name via command line argument -o. Useful if you have multiple output modules in one path.", false);
76}
77
79{
80 m_eventMetaData.isRequired();
81 if (not m_particleList.empty())
83
84 // Initializing the output root file
85
86 // override the output file name with what's been provided with the -o option
88 const std::string& outputFileArgument = Environment::Instance().getOutputFileOverride();
89 if (!outputFileArgument.empty())
90 m_fileName = outputFileArgument;
91 }
92
93 if (!m_fileNameSuffix.empty())
94 m_fileName = m_fileName.insert(m_fileName.rfind(".root"), m_fileNameSuffix);
95
96 if (m_fileName.empty()) {
97 B2FATAL("Output root file name is not set. Please set a valid root output file name (\"fileName\" module parameter).");
98 }
99 // See if there is already a file in which case add a new tree to it ...
100 // otherwise create a new file (all handled by framework)
102 if (!m_file) {
103 B2ERROR("Could not create file \"" << m_fileName <<
104 "\". Please set a valid root output file name (\"fileName\" module parameter).");
105 return;
106 }
107
108 TDirectory::TContext directoryGuard(m_file.get());
109
110 // check if TTree with that name already exists
111 if (m_file->Get(m_treeName.c_str())) {
112 B2FATAL("Tree with the name \"" << m_treeName
113 << "\" already exists in the file \"" << m_fileName << "\"\n"
114 << "\nYou probably want to either set the output fileName or the treeName to something else:\n\n"
115 << " from modularAnalysis import variablesToNtuple\n"
116 << " variablesToNtuple('pi+:all', ['p'], treename='pions', filename='variablesToNtuple.root')\n"
117 << " variablesToNtuple('gamma:all', ['p'], treename='photons', filename='variablesToNtuple.root') # two trees, same file\n"
118 << "\n == Or ==\n"
119 << " from modularAnalysis import variablesToNtuple\n"
120 << " variablesToNtuple('pi+:all', ['p'], filename='pions.root')\n"
121 << " variablesToNtuple('gamma:all', ['p'], filename='photons.root') # two files\n"
122 );
123 return;
124 }
125
126 // set up tree and register it in the datastore
128 m_tree.construct(m_treeName.c_str(), "");
129 m_tree->get().SetCacheSize(100000);
130
131 // declare counter branches - pass through variable list, remove counters added by user
132 m_tree->get().Branch("__experiment__", &m_experiment, "__experiment__/I");
133 m_tree->get().Branch("__run__", &m_run, "__run__/I");
134 m_tree->get().Branch("__event__", &m_event, "__event__/i");
135 m_tree->get().Branch("__production__", &m_production, "__production__/I");
136 if (not m_particleList.empty()) {
137 m_tree->get().Branch("__candidate__", &m_candidate, "__candidate__/I");
138 m_tree->get().Branch("__ncandidates__", &m_ncandidates, "__ncandidates__/I");
139 }
140
141 if (not m_signalSideParticleList.empty()) {
143 m_tree->get().Branch("__signalSideCandidate__", &m_signalSideCandidate, "__signalSideCandidate__/I");
144 m_tree->get().Branch("__nSignalSideCandidates__", &m_nSignalSideCandidates, "__nSignalSideCandidates__/I");
145 if (not m_roe.isOptional("RestOfEvent")) {
146 B2WARNING("The signalSideParticleList is set outside of a for_each loop over the RestOfEvent. "
147 << "__signalSideCandidates__ and __nSignalSideCandidate__ will be always -1 and 0, respectively.");
148 }
149 }
150
151 if (m_stringWrapper.isOptional("MCDecayString"))
152 m_tree->get().Branch("__MCDecayString__", &m_MCDecayString);
153
154 if (m_storeEventType) {
155 m_tree->get().Branch("__eventType__", &m_eventType);
156 if (not m_eventExtraInfo.isOptional())
157 B2INFO("EventExtraInfo is not registered. __eventType__ will be empty. The eventType is available from MC16 on.");
158 }
159
160 for (const auto& variable : m_variables)
161 if (Variable::isCounterVariable(variable)) {
162 B2WARNING("The counter '" << variable
163 << "' is handled automatically by VariablesToNtuple, you don't need to add it.");
164 }
165
166 // declare branches and get the variable strings
168 // remove duplicates from list of variables but keep the previous order
169 unordered_set<string> seen;
170 auto newEnd = remove_if(m_variables.begin(), m_variables.end(), [&seen](const string & varStr) {
171 if (seen.find(varStr) != std::end(seen)) return true;
172 seen.insert(varStr);
173 return false;
174 });
175 m_variables.erase(newEnd, m_variables.end());
176
177 if (m_useFloat)
178 m_branchAddressesFloat.resize(m_variables.size() + 1);
179 else
180 m_branchAddressesDouble.resize(m_variables.size() + 1);
181 m_branchAddressesInt.resize(m_variables.size() + 1);
182 if (m_useFloat) {
183 m_tree->get().Branch("__weight__", &m_branchAddressesFloat[0], "__weight__/F");
184 } else {
185 m_tree->get().Branch("__weight__", &m_branchAddressesDouble[0], "__weight__/D");
186 }
187 size_t enumerate = 1;
188 for (const string& varStr : m_variables) {
189 string branchName = MakeROOTCompatible::makeROOTCompatible(varStr);
190
191 // Check for deprecated variables
193
194 // also collection function pointers
196 if (!var) {
197 B2ERROR("Variable '" << varStr << "' is not available in Variable::Manager!");
198 } else {
199 if (m_particleList.empty() && var->description.find("[Eventbased]") == string::npos) {
200 B2ERROR("Variable '" << varStr << "' is not an event-based variable, "
201 "but you are using VariablesToNtuple without a decay string, i.e. in the event-wise mode.\n"
202 "If you have created an event-based alias you can wrap your alias with `eventCached` to "
203 "declare it as event based, which avoids this error.\n\n"
204 "vm.addAlias('myAliasName', 'eventCached(myAlias)')");
205 continue;
206 }
207 if (var->variabletype == Variable::Manager::VariableDataType::c_double) {
208 if (m_useFloat) {
209 m_tree->get().Branch(branchName.c_str(), &m_branchAddressesFloat[enumerate], (branchName + "/F").c_str());
210 } else {
211 m_tree->get().Branch(branchName.c_str(), &m_branchAddressesDouble[enumerate], (branchName + "/D").c_str());
212 }
213 } else if (var->variabletype == Variable::Manager::VariableDataType::c_int) {
214 m_tree->get().Branch(branchName.c_str(), &m_branchAddressesInt[enumerate], (branchName + "/I").c_str());
215 } else if (var->variabletype == Variable::Manager::VariableDataType::c_bool) {
216 m_tree->get().Branch(branchName.c_str(), &m_branchAddressesInt[enumerate], (branchName + "/O").c_str());
217 }
218 m_functions.push_back(std::make_pair(var->function, var->variabletype));
219 }
220 enumerate++;
221 }
222 m_tree->get().SetBasketSize("*", m_basketsize);
223
224 m_sampling_name = std::get<0>(m_sampling);
225 m_sampling_rates = std::get<1>(m_sampling);
226
227 if (m_sampling_name != "") {
229 if (m_sampling_variable == nullptr) {
230 B2FATAL("Couldn't find sample variable " << m_sampling_name << " via the Variable::Manager. Check the name!");
231 }
232 for (const auto& pair : m_sampling_rates)
233 m_sampling_counts[pair.first] = 0;
234 } else {
235 m_sampling_variable = nullptr;
236 }
237}
238
239
241{
242 if (m_sampling_variable == nullptr)
243 return 1.0;
244
245 long target = 0;
246 if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_double) {
247 target = std::lround(std::get<double>(m_sampling_variable->function(particle)));
248 } else if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_int) {
249 target = std::lround(std::get<int>(m_sampling_variable->function(particle)));
250 } else if (m_sampling_variable->variabletype == Variable::Manager::VariableDataType::c_bool) {
251 target = std::lround(std::get<bool>(m_sampling_variable->function(particle)));
252 }
253 if (m_sampling_rates.find(target) != m_sampling_rates.end() and m_sampling_rates[target] > 0) {
254 m_sampling_counts[target]++;
255 if (m_sampling_counts[target] % m_sampling_rates[target] != 0)
256 return 0;
257 else {
258 m_sampling_counts[target] = 0;
259 return m_sampling_rates[target];
260 }
261 }
262 return 1.0;
263}
264
266{
267 m_event = m_eventMetaData->getEvent();
268 m_run = m_eventMetaData->getRun();
269 m_experiment = m_eventMetaData->getExperiment();
270 m_production = m_eventMetaData->getProduction();
271
272 if (m_stringWrapper.isValid())
273 m_MCDecayString = m_stringWrapper->getString();
274 else
275 m_MCDecayString = "";
276
277 if (m_storeEventType and m_eventExtraInfo.isValid())
278 m_eventType = m_eventExtraInfo->getEventType();
279 else
280 m_eventType = "";
281
282 if (not m_signalSideParticleList.empty()) {
283 if (m_roe.isValid()) {
285 auto signal = m_roe->getRelatedFrom<Particle>();
286 m_signalSideCandidate = signaSideParticleList->getIndex(signal);
287 m_nSignalSideCandidates = signaSideParticleList->getListSize();
288 } else {
291 }
292 }
293
294 if (m_particleList.empty()) {
295 double weight = getInverseSamplingRateWeight(nullptr);
296 if (m_useFloat) {
297 m_branchAddressesFloat[0] = weight;
298 } else {
299 m_branchAddressesDouble[0] = weight;
300 }
301 if (weight > 0) {
302 for (unsigned int iVar = 0; iVar < m_variables.size(); iVar++) {
303 auto var_result = std::get<0>(m_functions[iVar])(nullptr);
304 auto var_type = std::get<1>(m_functions[iVar]);
305 if (std::holds_alternative<double>(var_result)) {
306 if (var_type != Variable::Manager::VariableDataType::c_double)
307 B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
308 "'. Expected Variable::Manager::VariableDataType::c_double. Exported data for this variable might be incorrect.");
309 if (m_useFloat) {
310 m_branchAddressesFloat[iVar + 1] = std::get<double>(var_result);
311 } else {
312 m_branchAddressesDouble[iVar + 1] = std::get<double>(var_result);
313 }
314 } else if (std::holds_alternative<int>(var_result)) {
315 if (var_type != Variable::Manager::VariableDataType::c_int)
316 B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
317 "'. Expected Variable::Manager::VariableDataType::c_int. Exported data for this variable might be incorrect.");
318 m_branchAddressesInt[iVar + 1] = std::get<int>(var_result);
319 } else if (std::holds_alternative<bool>(var_result)) {
320 if (var_type != Variable::Manager::VariableDataType::c_bool)
321 B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
322 "'. Expected Variable::Manager::VariableDataType::c_bool. Exported data for this variable might be incorrect.");
323 m_branchAddressesInt[iVar + 1] = std::get<bool>(var_result);
324 }
325 }
326 m_tree->get().Fill();
327 }
328
329 } else {
331 m_ncandidates = particlelist->getListSize();
332 for (unsigned int iPart = 0; iPart < m_ncandidates; iPart++) {
333 m_candidate = iPart;
334 const Particle* particle = particlelist->getParticle(iPart);
335 double weight = getInverseSamplingRateWeight(particle);
336 if (m_useFloat) {
337 m_branchAddressesFloat[0] = weight;
338 } else {
339 m_branchAddressesDouble[0] = weight;
340 }
341 if (weight > 0) {
342 for (unsigned int iVar = 0; iVar < m_variables.size(); iVar++) {
343 auto var_result = std::get<0>(m_functions[iVar])(particle);
344 auto var_type = std::get<1>(m_functions[iVar]);
345 if (std::holds_alternative<double>(var_result)) {
346 if (var_type != Variable::Manager::VariableDataType::c_double)
347 B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
348 "'. Expected Variable::Manager::VariableDataType::c_double. Exported data for this variable might be incorrect.");
349 if (m_useFloat) {
350 m_branchAddressesFloat[iVar + 1] = std::get<double>(var_result);
351 } else {
352 m_branchAddressesDouble[iVar + 1] = std::get<double>(var_result);
353 }
354 } else if (std::holds_alternative<int>(var_result)) {
355 if (var_type != Variable::Manager::VariableDataType::c_int)
356 B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
357 "'. Expected Variable::Manager::VariableDataType::c_int. Exported data for this variable might be incorrect.");
358 m_branchAddressesInt[iVar + 1] = std::get<int>(var_result);
359 } else if (std::holds_alternative<bool>(var_result)) {
360 if (var_type != Variable::Manager::VariableDataType::c_bool)
361 B2WARNING("Wrong registered data type for variable '" + m_variables[iVar] +
362 "'. Expected Variable::Manager::VariableDataType::c_bool. Exported data for this variable might be incorrect.");
363 m_branchAddressesInt[iVar + 1] = std::get<bool>(var_result);
364 }
365 }
366 m_tree->get().Fill();
367 }
368 }
369 }
370}
371
373{
375 B2INFO("Writing NTuple " << m_treeName);
376 TDirectory::TContext directoryGuard(m_file.get());
377 m_tree->write(m_file.get());
378
379 const bool writeError = m_file->TestBit(TFile::kWriteError);
380 m_file.reset();
381 if (writeError) {
382 B2FATAL("A write error occurred while saving '" << m_fileName << "', please check if enough disk space is available.");
383 }
384 }
385}
In the store you can park objects that have to be accessed by various modules.
Definition: DataStore.h:51
@ c_DontWriteOut
Object/array should be NOT saved by output modules.
Definition: DataStore.h:71
const std::string & getOutputFileOverride() const
Return overriden output file name, or "" if none was set.
Definition: Environment.h:127
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
static std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_ParallelProcessingCertified
This module can be run in parallel processing mode safely (All I/O must be done through the data stor...
Definition: Module.h:80
@ c_TerminateInAllProcesses
When using parallel processing, call this module's terminate() function in all processes().
Definition: Module.h:83
Class to store reconstructed particles.
Definition: Particle.h:75
static bool isOutputProcess()
Return true if the process is an output process.
Definition: ProcHandler.cc:232
static bool parallelProcessingUsed()
Returns true if multiple processes have been spawned, false in single-core mode.
Definition: ProcHandler.cc:226
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
std::vector< std::string > resolveCollections(const std::vector< std::string > &variables)
Resolve Collection Returns variable names corresponding to the given collection or if it is not a col...
Definition: Manager.cc:179
const Var * getVariable(std::string name)
Get the variable belonging to the given key.
Definition: Manager.cc:57
static Manager & Instance()
get singleton instance.
Definition: Manager.cc:25
void checkDeprecatedVariable(const std::string &name)
Check if a variable is deprecated.
Definition: Manager.cc:443
StoreObjPtr< RestOfEvent > m_roe
ROE object.
bool m_useFloat
Use float type for floating-point numbers.
std::vector< std::string > m_variables
List of variables to save.
std::vector< float > m_branchAddressesFloat
Branch addresses of variables of type float.
virtual void initialize() override
Initialises the module.
std::map< int, unsigned int > m_sampling_rates
Inverse sampling rates.
virtual void event() override
Method called for each event.
unsigned int m_ncandidates
total n candidates
virtual void terminate() override
Write TTree to file, and close file if necessary.
StoreObjPtr< EventMetaData > m_eventMetaData
the event information
std::map< int, unsigned long int > m_sampling_counts
Current number of samples with this value.
std::string m_fileName
Name of ROOT file for output.
std::vector< std::pair< Variable::Manager::FunctionPtr, Variable::Manager::VariableDataType > > m_functions
List of pairs of function pointers and respective data type corresponding to given variables.
std::tuple< std::string, std::map< int, unsigned int > > m_sampling
Tuple of variable name and a map of integer values and inverse sampling rate.
std::vector< int > m_branchAddressesInt
Branch addresses of variables of type int (or bool)
int m_basketsize
Size of TBaskets in the output ROOT file in bytes.
int m_production
production ID (to distinguish MC samples)
unsigned int m_nSignalSideCandidates
total n signal-side candidates
StoreObjPtr< StringWrapper > m_stringWrapper
string wrapper storing the MCDecayString
bool m_storeEventType
If true, the branch eventType is added.
std::string m_particleList
Name of particle list with reconstructed particles.
bool m_ignoreCommandLineOverride
if true, ignore override of filename
std::string m_eventType
EventType to be filled.
StoreObjPtr< RootMergeable< TTree > > m_tree
The ROOT TNtuple for output.
std::shared_ptr< TFile > m_file
ROOT file for output.
std::vector< double > m_branchAddressesDouble
Branch addresses of variables of type double.
std::string m_sampling_name
Variable name of sampling variable.
float getInverseSamplingRateWeight(const Particle *particle)
Calculate inverse sampling rate weight.
StoreObjPtr< EventExtraInfo > m_eventExtraInfo
pointer to EventExtraInfo
std::string m_treeName
Name of the TTree.
std::string m_signalSideParticleList
Name of signal-side particle list
const Variable::Manager::Var * m_sampling_variable
Variable Pointer to target variable.
int m_signalSideCandidate
signal-side candidate counter
std::string m_fileNameSuffix
Suffix to be appended to the output file name.
std::string m_MCDecayString
MC decay string to be filled.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
std::shared_ptr< TFile > getFile(std::string, bool ignoreErrors=false)
Get a file with a specific name, if is does not exist it will be created.
static RootFileCreationManager & getInstance()
Interface for the FileManager.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:24
STL namespace.
VariableDataType variabletype
data type of variable
Definition: Manager.h:133
A variable returning a floating-point value for a given Particle.
Definition: Manager.h:146
FunctionPtr function
Pointer to function.
Definition: Manager.h:147