Belle II Software development
RootInputModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9
10#include <framework/modules/rootio/RootInputModule.h>
11
12#include <framework/io/RootIOUtilities.h>
13#include <framework/io/RootFileInfo.h>
14#include <framework/core/FileCatalog.h>
15#include <framework/core/InputController.h>
16#include <framework/pcore/Mergeable.h>
17#include <framework/datastore/StoreObjPtr.h>
18#include <framework/datastore/DataStore.h>
19#include <framework/datastore/DependencyMap.h>
20#include <framework/dataobjects/EventMetaData.h>
21#include <framework/utilities/NumberSequence.h>
22#include <framework/utilities/ScopeGuard.h>
23#include <framework/database/Configuration.h>
24
25#include <TClonesArray.h>
26#include <TEventList.h>
27#include <TObjArray.h>
28#include <TChainElement.h>
29#include <TError.h>
30
31#include <iomanip>
32
33using namespace std;
34using namespace Belle2;
35using namespace RootIOUtilities;
36
37REG_MODULE(RootInput);
38
39RootInputModule::RootInputModule() : Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
40{
41 //Set module properties
42 setDescription("Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
44
45 //Parameter definition
46 vector<string> emptyvector;
47 addParam("inputFileName", m_inputFileName,
48 "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
49 string(""));
50 addParam("inputFileNames", m_inputFileNames,
51 "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
52 emptyvector);
53 addParam("entrySequences", m_entrySequences,
54 "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
55 "Must be specified exactly once for each file to be opened."
56 "The first event has the entry number 0.", emptyvector);
57 addParam("ignoreCommandLineOverride", m_ignoreCommandLineOverride,
58 "Ignore override of file name via command line argument -i.", false);
59
60 addParam("skipNEvents", m_skipNEvents, "Skip this number of events before starting.", 0u);
61 addParam("skipToEvent", m_skipToEvent, "Skip events until the event with "
62 "the specified (experiment, run, event number) occurs. This parameter "
63 "is useful for debugging to start with a specific event.", m_skipToEvent);
64
66 "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
68 "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
69
71 "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
72 vector<string> excludePersistent({"ProcessStatistics"});
74 "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
75
76 addParam("parentLevel", m_parentLevel,
77 "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://xwiki.desy.de/xwiki/rest/p/077a2 for details.",
78 0);
79
80 addParam("collectStatistics", m_collectStatistics,
81 "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
82 false);
83 addParam("cacheSize", m_cacheSize,
84 "file cache size in Mbytes. If negative, use root default", 0);
85
86 addParam("discardErrorEvents", m_discardErrorEvents,
87 "Discard events with an error flag != 0", m_discardErrorEvents);
88 addParam("silentErrrorDiscardMask", m_discardErrorMask,
89 "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined "
90 "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags "
91 "present in this mask", m_discardErrorMask);
92
93 addParam("isSecondaryInput", m_isSecondaryInput,
94 "When using a second RootInputModule in an independent path [usually if you are using add_independent_merge_path(...)] "
95 "this has to be set to true",
96 false);
97}
98
100
102{
103 unsigned int skipNEventsOverride = Environment::Instance().getSkipEventsOverride();
104 if (skipNEventsOverride != 0)
105 m_skipNEvents = skipNEventsOverride;
106
107 auto entrySequencesOverride = Environment::Instance().getEntrySequencesOverride();
108 if (entrySequencesOverride.size() > 0)
109 m_entrySequences = entrySequencesOverride;
110
114
115 const vector<string>& inputFiles = getFileNames();
116 if (inputFiles.empty()) {
117 B2FATAL("You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
118 }
119 if (!m_inputFileName.empty() && !m_inputFileNames.empty()) {
120 B2FATAL("Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
121 }
123 if (m_inputFileNames.empty()) {
124 B2FATAL("No valid files specified!");
125 }
126
127 if (m_entrySequences.size() > 0 and m_inputFileNames.size() != m_entrySequences.size()) {
128 B2FATAL("Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
129 << m_inputFileNames.size() << " len(entrySequences) = " << m_entrySequences.size());
130 }
131
132 m_inputFileName = "";
133 // we'll only use m_inputFileNames from now on
134
135 // so let's create the chain objects ...
136 m_persistent = new TChain(c_treeNames[DataStore::c_Persistent].c_str());
137 m_tree = new TChain(c_treeNames[DataStore::c_Event].c_str());
138
139 // time for sanity checks. The problem is that this needs to read a few bytes
140 // from every input file so for jobs with large amount of input files this
141 // will not be efficient.
142 // TODO: We might want to create a different input module which will not
143 // check anything and require manual input like the number of events in
144 // each file and the global tags to use. That would be more efficient
145 // but also less safe
146
147 // list of required branches. We keep this empty for now and only fill
148 // it after we checked the first file to make sure all other files have the
149 // same branches available.
150 std::set<std::string> requiredEventBranches;
151 std::set<std::string> requiredPersistentBranches;
152 // Event metadata from all files, keep it around for sanity checks and globaltag replay
153 std::vector<FileMetaData> fileMetaData;
154 // and if so, what is the sum
155 std::result_of<decltype(&FileMetaData::getMcEvents)(FileMetaData)>::type sumInputMCEvents{0};
156
157 // scope for local variables
158 {
159 // temporarily disable some root warnings
160 auto rootWarningGuard = ScopeGuard::guardValue(gErrorIgnoreLevel, kWarning + 1);
161 // do all files have a consistent number of MC events? that is all positive or all zero
162 bool validInputMCEvents{true};
163 for (const string& fileName : m_inputFileNames) {
164 // read metadata and create sum of MCEvents and global tags
165 try {
166 RootIOUtilities::RootFileInfo fileInfo(fileName);
167 FileMetaData meta = fileInfo.getFileMetaData();
168 if (meta.getNEvents() == 0) {
169 B2WARNING("File appears to be empty, skipping" << LogVar("filename", fileName));
170 continue;
171 }
172 realDataWorkaround(meta);
173 fileMetaData.push_back(meta);
174 // make sure we only look at data or MC. For the first file this is trivially true
175 if (fileMetaData.front().isMC() != meta.isMC()) {
176 throw std::runtime_error("Mixing real data and simulated data for input files is not supported");
177 }
178 // accumulate number of inputMCEvents now
179 if (validInputMCEvents) {
180 // make sure that all files have either a non-zero or zero mcevents.
181 if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
182 B2WARNING("inconsistent input files: zero mcEvents, setting total number of MC events to zero" << LogVar("filename", fileName));
183 validInputMCEvents = false;
184 }
185 // So accumulate the number of MCEvents but let's be careful to not have an overflow here
186 if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
187 B2FATAL("Number of MC events is too large and cannot be represented anymore");
188 }
189 }
190 // for the first file we don't know what branches are required but now we can determine them as we know the file can be opened
191 if (requiredEventBranches.empty()) {
192 // make sure we have event meta data
193 fileInfo.checkMissingBranches({"EventMetaData"}, false);
194 requiredEventBranches = fileInfo.getBranchNames(false);
195 // filter the branches depending on what the user selected. Note we
196 // do the same thing again in connectBranches but we leave it like
197 // that because we also want to read branches from parent files
198 // selectively and thus we need to filter the branches there anyway.
199 // Here we just do it to ensure all files we read directly (which is
200 // 99% of the use case) contain all the branches we want.
201 requiredEventBranches = RootIOUtilities::filterBranches(requiredEventBranches, m_branchNames[DataStore::c_Event],
203 // but make sure we always have EventMetaData ...
204 requiredEventBranches.emplace("EventMetaData");
205
206 // Same for persistent data ...
207 requiredPersistentBranches = fileInfo.getBranchNames(true);
208 // filter the branches depending on what the user selected
209 requiredPersistentBranches = RootIOUtilities::filterBranches(requiredPersistentBranches, m_branchNames[DataStore::c_Persistent],
211 } else {
212 // ok we already have the list ... so let's make sure following files have the same branches
213 fileInfo.checkMissingBranches(requiredEventBranches, false);
214 fileInfo.checkMissingBranches(requiredPersistentBranches, true);
215 }
216 // ok, so now we have the file, add it to the chain. We trust the amount of events from metadata here.
217 if (m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 || m_persistent->AddFile(fileName.c_str(), 1) == 0) {
218 throw std::runtime_error("Could not add file to TChain");
219 }
220 B2INFO("Added file " + fileName);
221 } catch (std::exception& e) {
222 B2FATAL("Could not open input file " << std::quoted(fileName) << ": " << e.what());
223 }
224 }
225 }
226
227 if (m_tree->GetNtrees() == 0) B2FATAL("No file could be opened, aborting");
228 // Set cache size TODO: find out if files are remote and use a bigger default
229 // value if at least one file is non-local
230 if (m_cacheSize >= 0) m_tree->SetCacheSize(m_cacheSize * 1024 * 1024);
231
232 // Check if the files we added to the Chain are unique,
233 // if the same file is added multiple times the TEventList used for the eventSequence feature
234 // will process each file only once with the union of both given sequences.
235 // It is not clear if the user wants this, so we raise a fatal in this situation.
236 {
237 std::set<std::string> unique_filenames;
238
239 // The following lines are directly from the ROOT documentation
240 // see TChain::AddFile
241 TObjArray* fileElements = m_tree->GetListOfFiles();
242 TIter next(fileElements);
243 TChainElement* chEl = nullptr;
244 while ((chEl = (TChainElement*)next())) {
245 if (!unique_filenames.insert(chEl->GetTitle()).second) {
246 B2WARNING("The input file '" << chEl->GetTitle() << "' was specified more than once");
247 // seems we have duplicate files so we process events more than once. Disable forwarding of MC event number
248 m_processingAllEvents = false;
249 }
250 }
251 if ((unsigned int)m_tree->GetNtrees() != unique_filenames.size() && m_entrySequences.size() > 0) {
252 B2FATAL("You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
253 "Please specify each file only once if you're using the sequence feature!");
254 }
255 }
256
257 if (m_entrySequences.size() > 0) {
258 auto* elist = new TEventList("input_event_list");
259 for (unsigned int iFile = 0; iFile < m_entrySequences.size(); ++iFile) {
260 int64_t offset = m_tree->GetTreeOffset()[iFile];
261 int64_t next_offset = m_tree->GetTreeOffset()[iFile + 1];
262 // check if Sequence consists only of ':', e.g. the whole file is requested
263 if (m_entrySequences[iFile] == ":") {
264 for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
265 elist->Enter(global_entry);
266 } else {
267 for (const auto& entry : generate_number_sequence(m_entrySequences[iFile])) {
268 int64_t global_entry = entry + offset;
269 if (global_entry >= next_offset) {
270 B2WARNING("Given sequence contains entry numbers which are out of range. "
271 "I won't add any further events to the EventList for the current file.");
272 break;
273 } else {
274 elist->Enter(global_entry);
275 }
276 }
277 }
278 }
279 m_tree->SetEventList(elist);
280 }
281
282 B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Persistent] + "'" << LogVar("entries", m_persistent->GetEntriesFast()));
283 B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Event] + "'" << LogVar("entries", m_tree->GetEntriesFast()));
284
287
289 delete m_tree;
290 m_tree = nullptr; //don't try to read from there
291 } else {
294 }
295
296 if (m_parentLevel > 0) {
298 } else if (m_parentLevel < 0) {
299 B2ERROR("parentLevel must be >= 0!");
300 return;
301 }
302
303 // Let's check check if we process everything
304 // * all filenames unique (already done above)
305 // * no event skipping either with skipN, entry sequences or skipToEvent
306 // * no -n or process(path, N) with N <= the number of entries in our files
307 unsigned int maxEvent = Environment::Instance().getNumberEventsOverride();
310
311 if (!m_skipToEvent.empty()) {
312 // Skipping to some specific event is also not processing all events ...
313 m_processingAllEvents = false;
314 // make sure the number of entries is exactly 3
315 if (m_skipToEvent.size() != 3) {
316 B2ERROR("skipToEvent must be a list of three values: experiment, run, event number");
317 // ignore the value
318 m_skipToEvent.clear();
319 } else {
321 }
322 if (m_nextEntry > 0) {
323 B2ERROR("You cannot supply a number of events to skip (skipNEvents) and an "
324 "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
325 //force the number of skipped events to be zero
326 m_nextEntry = 0;
327 }
328 }
329
330 // Tell the InputController which event will be processed first
331 // (important if we want to do event mixing and skip some events in the input)
332 if (m_nextEntry > 0) {
334 }
335
336 // Processing everything so forward number of MC events
338 Environment::Instance().setNumberOfMCEvents(sumInputMCEvents);
339 }
340 // And setup global tag replay ...
342}
343
344
346{
347 if (!m_tree)
348 return;
349
350 while (true) {
352 if (nextEntry >= 0 && nextEntry < InputController::numEntries(m_isSecondaryInput)) {
353 // don't show this message if we are doing event merging, as it will pop up twice for every event
355 B2INFO("RootInput: will read entry " << nextEntry << " next.");
356 }
357 m_nextEntry = nextEntry;
362 if (entry >= 0) {
363 const long chainentry = m_tree->GetChainEntryNumber(entry);
364 B2INFO("RootInput: will read entry " << chainentry << " (entry " << entry << " in current file) next.");
365 m_nextEntry = chainentry;
366 } else {
367 B2ERROR("Couldn't find entry (" << InputController::getNextEvent() << ", " << InputController::getNextRun() << ", " <<
368 InputController::getNextExperiment() << ") in file! Loading entry " << m_nextEntry << " instead.");
369 }
370 }
372
373 readTree();
374 m_nextEntry++;
375
376 // check for events with errors
377 unsigned int errorFlag = 0;
378 if (m_discardErrorEvents && (m_nextEntry >= 0)) {
379 const StoreObjPtr<EventMetaData> eventMetaData;
380 errorFlag = eventMetaData->getErrorFlag();
381 if (errorFlag != 0) {
382 if (errorFlag & ~m_discardErrorMask) {
383 B2WARNING("Discarding corrupted event" << LogVar("errorFlag", errorFlag) << LogVar("experiment", eventMetaData->getExperiment())
384 << LogVar("run", eventMetaData->getRun()) << LogVar("event", eventMetaData->getEvent()));
385 }
386 // make sure this event is not used if it's the last one in the file
387 eventMetaData->setEndOfData();
388 }
389 }
390 if (errorFlag == 0) break;
391 }
392}
393
394
396{
397 if (m_collectStatistics and m_tree) {
398 //add stats for last file
399 m_readStats.addFromFile(m_tree->GetFile());
400 }
401 delete m_tree;
402 delete m_persistent;
403 ReadStats parentReadStats;
404 for (const auto& entry : m_parentTrees) {
405 TFile* f = entry.second->GetCurrentFile();
407 parentReadStats.addFromFile(f);
408
409 delete f;
410 }
411
413 B2INFO("Statistics for event tree: " << m_readStats.getString());
414 B2INFO("Statistics for event tree (parent files): " << parentReadStats.getString());
415 }
416
417 for (auto& branch : m_connectedBranches) {
418 branch.clear();
419 }
420 m_storeEntries.clear();
422 m_parentStoreEntries.clear();
423 m_parentTrees.clear();
424}
425
426
428{
429 if (!m_tree)
430 return;
431
432 //keep snapshot of TFile stats (to use if it changes)
433 ReadStats currentEventStats;
435 currentEventStats.addFromFile(m_tree->GetFile());
436 }
437
438 // Check if there are still new entries available.
439 int localEntryNumber = m_nextEntry;
440 if (m_entrySequences.size() > 0) {
441 localEntryNumber = m_tree->GetEntryNumber(localEntryNumber);
442 }
443 localEntryNumber = m_tree->LoadTree(localEntryNumber);
444
445 if (localEntryNumber == -2) {
446 m_nextEntry = -2;
447 return; //end of file
448 } else if (localEntryNumber < 0) {
449 B2FATAL("Failed to load tree, corrupt file? Check standard error for additional messages. TChain::LoadTree() returned" <<
450 LogVar("error", localEntryNumber));
451 }
452 B2DEBUG(39, "Reading file entry " << m_nextEntry);
453
454 //Make sure transient members of objects are reinitialised
455 for (auto entry : m_storeEntries) {
456 entry->resetForGetEntry();
457 }
458 for (const auto& storeEntries : m_parentStoreEntries) {
459 for (auto entry : storeEntries) {
460 entry->resetForGetEntry();
461 }
462 }
463
464 int bytesRead = m_tree->GetTree()->GetEntry(localEntryNumber);
465 if (bytesRead <= 0) {
466 B2FATAL("Could not read 'tree' entry " << m_nextEntry << " in file " << m_tree->GetCurrentFile()->GetName());
467 }
468
469 //In case someone is tempted to change this:
470 // TTree::GetCurrentFile() returns a TFile pointer to a fixed location,
471 // calling GetName() on the TFile almost works as expected, but starts with the
472 // last file in a TChain. (-> we re-read the first persistent tree with TChain,
473 // with ill results for Mergeable objects.)
474 // GetTreeNumber() also starts at the last entry before we read the first event from m_tree,
475 // so we'll save the last persistent tree loaded and only reload on changes.
477 const long treeNum = m_tree->GetTreeNumber();
478 const bool fileChanged = (m_lastPersistentEntry != treeNum);
479 if (fileChanged) {
481 m_readStats.add(currentEventStats);
482 }
483 // file changed, read the FileMetaData object from the persistent tree and update the parent file metadata
484 readPersistentEntry(treeNum);
485 B2INFO("Loading new input file"
486 << LogVar("filename", m_tree->GetFile()->GetName())
487 << LogVar("metadata LFN", fileMetaData->getLfn()));
488 }
489 realDataWorkaround(*fileMetaData);
490
491 for (auto entry : m_storeEntries) {
492 if (!entry->object) {
493 entryNotFound("Event durability tree (global entry: " + std::to_string(m_nextEntry) + ")", entry->name, fileChanged);
494 entry->recoverFromNullObject();
495 entry->ptr = nullptr;
496 } else {
497 entry->ptr = entry->object;
498 }
499 }
500
501 if (m_parentLevel > 0) {
502 if (!readParentTrees())
503 B2FATAL("Could not read data from parent file!");
504 }
505
506 // Nooow, if the object didn't exist in the event when we wrote it to File we still have it in the file but it's marked as invalid Object.
507 // So go through everything and check for the bit and invalidate as necessary
508 for (auto entry : m_storeEntries) {
509 if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
510 }
511 for (const auto& storeEntries : m_parentStoreEntries) {
512 for (auto entry : storeEntries) {
513 if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
514 }
515 }
516}
517
518bool RootInputModule::connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries)
519{
520 B2DEBUG(30, "File changed, loading persistent data.");
522 //Go over the branchlist and connect the branches with DataStore entries
523 const TObjArray* branchesObjArray = tree->GetListOfBranches();
524 if (!branchesObjArray) {
525 B2FATAL("Tree '" << tree->GetName() << "' doesn't contain any branches!");
526 }
527 std::vector<TBranch*> branches;
528 set<string> branchList;
529 for (int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
530 auto* branch = static_cast<TBranch*>(branchesObjArray->At(jj));
531 if (!branch) continue;
532 branchList.insert(branch->GetName());
533 branches.emplace_back(branch);
534 // start with all branches disabled and only enable what we read
535 setBranchStatus(branch, false);
536 }
537 //skip branches the user doesn't want
538 branchList = filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability, true);
539 for (TBranch* branch : branches) {
540 const std::string branchName = branch->GetName();
541 //skip already connected branches
542 if (m_connectedBranches[durability].find(branchName) != m_connectedBranches[durability].end())
543 continue;
544
545 if ((branchList.count(branchName) == 0) and
546 ((branchName != "FileMetaData") || (tree != m_persistent)) and
547 ((branchName != "EventMetaData") || (tree != m_tree))) {
548 continue;
549 }
550 auto found = setBranchStatus(branch, true);
551 B2DEBUG(32, "Enabling branch" << LogVar("branchName", branchName)
552 << LogVar("children found", found));
553
554 //Get information about the object in the branch
555 TObject* objectPtr = nullptr;
556 branch->SetAddress(&objectPtr);
557 branch->GetEntry();
558 bool array = (string(branch->GetClassName()) == "TClonesArray");
559 TClass* objClass = nullptr;
560 if (array)
561 objClass = (static_cast<TClonesArray*>(objectPtr))->GetClass();
562 else
563 objClass = objectPtr->IsA();
564 delete objectPtr;
565
566 //Create a DataStore entry and connect the branch address to it
567 if (!DataStore::Instance().registerEntry(branchName, durability, objClass, array, DataStore::c_WriteOut)) {
568 B2FATAL("Cannot connect branch to datastore" << LogVar("branchName", branchName));
569 continue;
570 }
571 DataStore::StoreEntry& entry = (map.find(branchName))->second;
572 tree->SetBranchAddress(branch->GetName(), &(entry.object));
573 if (storeEntries) storeEntries->push_back(&entry);
574
575 //Keep track of already connected branches
576 m_connectedBranches[durability].insert(branchName);
577 }
578
579 return true;
580}
581
582
584{
585 // get the experiment/run/event number and parentLfn of the first entry
586 assert(m_tree);
587 TBranch* branch = m_tree->GetBranch("EventMetaData");
588 char* address = branch->GetAddress();
589 EventMetaData* eventMetaData = nullptr;
590 branch->SetAddress(&eventMetaData);
591 branch->GetEntry(0);
592 int experiment = eventMetaData->getExperiment();
593 int run = eventMetaData->getRun();
594 unsigned int event = eventMetaData->getEvent();
595 std::string parentLfn = eventMetaData->getParentLfn();
596 branch->SetAddress(address);
597
598 // loop over parents and get their metadata
599 for (int level = 0; level < m_parentLevel; level++) {
600 // open the parent file
601 TDirectory* dir = gDirectory;
602 const std::string parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
603 TFile* file = TFile::Open(parentPfn.c_str(), "READ");
604 dir->cd();
605 if (!file || !file->IsOpen()) {
606 B2ERROR("Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
607 << LogVar("LFN", parentLfn) << LogVar("PFN", parentPfn));
608 return false;
609 }
610
611 // get the event tree and connect its branches
612 auto* tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
613 if (!tree) {
614 B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
615 return false;
616 }
617 if (int(m_parentStoreEntries.size()) <= level) m_parentStoreEntries.resize(level + 1);
619 m_parentTrees.insert(std::make_pair(parentLfn, tree));
620
621 // get the persistent tree and read its branches
622 auto* persistent = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Persistent].c_str()));
623 if (!persistent) {
624 B2ERROR("No tree " << c_treeNames[DataStore::c_Persistent] << " found in " << parentPfn);
625 return false;
626 }
627 connectBranches(persistent, DataStore::c_Persistent, nullptr);
628
629 // get parent LFN of parent
630 EventMetaData* metaData = nullptr;
631 tree->SetBranchAddress("EventMetaData", &metaData);
632 long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
633 tree->GetBranch("EventMetaData")->GetEntry(entry);
634 parentLfn = metaData->getParentLfn();
635 }
636
637 return true;
638}
639
640
642{
643 const StoreObjPtr<EventMetaData> eventMetaData;
644 int experiment = eventMetaData->getExperiment();
645 int run = eventMetaData->getRun();
646 unsigned int event = eventMetaData->getEvent();
647
648 std::string parentLfn = eventMetaData->getParentLfn();
649 for (int level = 0; level < m_parentLevel; level++) {
650 const std::string& parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
651
652 // Open the parent file if we haven't done this already
653 TTree* tree = nullptr;
654 if (m_parentTrees.find(parentLfn) == m_parentTrees.end()) {
655 TDirectory* dir = gDirectory;
656 B2DEBUG(30, "Opening parent file" << LogVar("PFN", parentPfn));
657 TFile* file = TFile::Open(parentPfn.c_str(), "READ");
658 dir->cd();
659 if (!file || !file->IsOpen()) {
660 B2ERROR("Couldn't open parent file " << parentPfn);
661 return false;
662 }
663 tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
664 if (!tree) {
665 B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
666 return false;
667 }
668 for (auto entry : m_parentStoreEntries[level]) {
669 tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
670 }
671 m_parentTrees.insert(std::make_pair(parentLfn, tree));
672 } else {
673 tree = m_parentTrees[parentLfn];
674 }
675
676 // get entry number in parent tree
677 long entryNumber = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
678 if (entryNumber < 0) {
679 B2ERROR("No event " << experiment << "/" << run << "/" << event << " in parent file " << parentPfn);
680 return false;
681 }
682
683 // read the tree and mark the data read in the data store
684 EventMetaData* parentMetaData = nullptr;
685 tree->SetBranchAddress("EventMetaData", &parentMetaData);
686 tree->GetEntry(entryNumber);
687 for (auto entry : m_parentStoreEntries[level]) {
688 entry->ptr = entry->object;
689 }
690
691 // set the parent LFN to the next level
692 parentLfn = parentMetaData->getParentLfn();
693 }
694
695 addEventListForIndexFile(parentLfn);
696
697 return true;
698}
699
700void RootInputModule::addEventListForIndexFile(const std::string& parentLfn)
701{
702 //is this really an index file? (=only EventMetaData stored)
703 if (!(m_parentLevel > 0 and m_storeEntries.size() == 1))
704 return;
705 //did we handle the current parent file already?
706 if (parentLfn == m_lastParentFileLFN)
707 return;
708 m_lastParentFileLFN = parentLfn;
709
710 B2INFO("Index file detected, scanning to generate event list.");
711 TTree* tree = m_parentTrees.at(parentLfn);
712
713 //both types of list work, TEventList seems to result in slightly less data being read.
714 auto* elist = new TEventList("parent_entrylist");
715 //TEntryListArray* elist = new TEntryListArray();
716
717 TBranch* branch = m_tree->GetBranch("EventMetaData");
718 auto* address = branch->GetAddress();
719 EventMetaData* eventMetaData = nullptr;
720 branch->SetAddress(&eventMetaData);
721 long nEntries = m_tree->GetEntries();
722 for (long i = m_nextEntry; i < nEntries; i++) {
723 branch->GetEntry(i);
724 int experiment = eventMetaData->getExperiment();
725 int run = eventMetaData->getRun();
726 unsigned int event = eventMetaData->getEvent();
727 const std::string& newParentLfn = eventMetaData->getParentLfn();
728
729 if (parentLfn != newParentLfn) {
730 //parent file changed, stopping for now
731 break;
732 }
733 long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
734 elist->Enter(entry);
735 }
736 branch->SetAddress(address);
737
738 if (tree) {
739 tree->SetEventList(elist);
740 //tree->SetEntryList(elist);
741 }
742}
743
744void RootInputModule::entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged)
745{
746 if (name == "ProcessStatistics" or DataStore::Instance().getDependencyMap().isUsedAs(name, DependencyMap::c_Input)) {
747 B2FATAL(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain required object " << name << ", aborting.");
748 } else if (fileChanged) {
749 B2WARNING(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain object " << name <<
750 " that was present in a previous entry.");
751 }
752}
753
755{
756 m_lastPersistentEntry = fileEntry;
757
758 for (auto entry : m_persistentStoreEntries) {
759 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
760 TObject* copyOfPreviousVersion = nullptr;
761 if (isMergeable) {
762 copyOfPreviousVersion = entry->object->Clone();
763 }
764 entry->resetForGetEntry();
765 //ptr stores old value (or nullptr)
766 entry->ptr = copyOfPreviousVersion;
767 }
768
769 int bytesRead = m_persistent->GetEntry(fileEntry);
770 if (bytesRead <= 0) {
771 const char* name = m_tree->GetCurrentFile() ? m_tree->GetCurrentFile()->GetName() : "<unknown>";
772 B2FATAL("Could not read 'persistent' TTree #" << fileEntry << " in file " << name);
773 }
774
775 for (auto entry : m_persistentStoreEntries) {
776 if (entry->object) {
777 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
778 if (isMergeable) {
779 const Mergeable* oldObj = static_cast<Mergeable*>(entry->ptr);
780 auto* newObj = static_cast<Mergeable*>(entry->object);
781 newObj->merge(oldObj);
782
783 delete entry->ptr;
784 }
785 entry->ptr = entry->object;
786 } else {
787 entryNotFound("Persistent tree", entry->name);
788 entry->recoverFromNullObject();
789 entry->ptr = nullptr;
790 }
791 }
792}
793
795{
796 if ((metaData.getSite().find("bfe0") == 0) && (metaData.getDate().compare("2019-06-30") < 0) &&
797 (metaData.getExperimentLow() > 0) && (metaData.getExperimentHigh() < 9) && (metaData.getRunLow() > 0)) {
798 metaData.declareRealData();
799 }
800}
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void setInputMetadata(const std::vector< FileMetaData > &inputMetadata)
To be called by input modules with the list of all input FileMetaData.
@ c_WriteOut
Object/array should be saved by output modules.
Definition: DataStore.h:70
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
Definition: DataStore.h:325
EDurability
Durability types.
Definition: DataStore.h:58
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
Definition: DataStore.h:59
static DataStore & Instance()
Instance of singleton Store.
Definition: DataStore.cc:54
bool registerEntry(const std::string &name, EDurability durability, TClass *objClass, bool array, EStoreFlags storeFlags)
Register an entry in the DataStore map.
Definition: DataStore.cc:190
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
Definition: DataStore.h:87
@ c_Input
required input.
Definition: DependencyMap.h:33
unsigned int getSkipEventsOverride() const
Get skipNEvents override, or 0 if unset.
Definition: Environment.h:89
std::vector< std::string > getEntrySequencesOverride() const
Returns the number sequences (e.g.
Definition: Environment.h:73
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
unsigned int getNumberEventsOverride() const
Returns number of events in run 1 for EventInfoSetter module, or 0 for no override.
Definition: Environment.h:67
void setNumberOfMCEvents(unsigned int n)
Set number of generated events (for EventInfoSetter).
Definition: Environment.h:109
Store event, run, and experiment numbers.
Definition: EventMetaData.h:33
const std::string & getParentLfn() const
Return LFN of the current parent file, or an empty string if not set.
int getRun() const
Run Getter.
unsigned int getEvent() const
Event Getter.
int getExperiment() const
Experiment Getter.
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
Definition: FileCatalog.cc:23
virtual std::string getPhysicalFileName(const std::string &lfn)
Get the physical file name for the LFN.
Definition: FileCatalog.cc:180
Metadata information about a file.
Definition: FileMetaData.h:29
int getRunLow() const
Lowest run number getter.
Definition: FileMetaData.h:53
void declareRealData()
Declare that this is not generated, but real data.
Definition: FileMetaData.h:294
const std::string & getDate() const
File creation date and time getter (UTC)
Definition: FileMetaData.h:95
const std::string & getSite() const
Site where the file was created getter.
Definition: FileMetaData.h:99
int getExperimentLow() const
Lowest experiment number getter.
Definition: FileMetaData.h:49
int getExperimentHigh() const
Highest experiment number getter.
Definition: FileMetaData.h:61
unsigned int getMcEvents() const
Number of generated events getter.
Definition: FileMetaData.h:123
static long getNextRun()
Return run number set via setNextEntry().
static void setNextEntry(long entry, bool independentPath=false)
Set the file entry to be loaded the next time event() is called.
static long getNextExperiment()
Return experiment number set via setNextEntry().
static bool getEventMerging()
Get if we are merging events from two paths.
static void eventLoaded(long entry, bool independentPath=false)
Indicate that an event (in the given entry) was loaded and reset all members related to the next entr...
static long numEntries(bool independentPath=false)
Returns total number of entries in the event tree.
static long getNextEntry(bool independentPath=false)
Return entry number set via setNextEntry().
static void setSkippedEntries(long entries, bool independentPath=false)
set the number of entries skipped by the RootInputModule.
static void setChain(const TChain *chain, bool independentPath=false)
Set the loaded TChain (event durability).
static void setCanControlInput(bool on)
Call this function from supported input modules.
static long getNextEvent()
Return event number set via setNextEntry().
Abstract base class for objects that can be merged.
Definition: Mergeable.h:31
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_Input
This module is an input module (reads data).
Definition: Module.h:78
Helper class to factorize some necessary tasks when working with Belle2 output files.
Definition: RootFileInfo.h:27
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
Definition: RootFileInfo.cc:41
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
Definition: RootFileInfo.cc:75
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
Definition: RootFileInfo.cc:55
bool m_discardErrorEvents
Discard events that have an error flag != 0.
StoreEntries m_persistentStoreEntries
Vector of DataStore entries of persistent durability that we are supposed to read in.
void realDataWorkaround(FileMetaData &metaData)
Correct isMC flag for raw data recorded before experiment 8 run 2364.
std::map< std::string, TTree * > m_parentTrees
Map of file LFNs to trees.
unsigned int m_discardErrorMask
Don't issue a warning when discarding events if the error flag consists exclusively of flags in this ...
bool m_isSecondaryInput
When using a second RootInputModule in an independent path [usually if you are using add_independent_...
bool m_collectStatistics
Collect statistics on amount of data read and print statistics (seperate for input & parent files) af...
long m_lastPersistentEntry
last entry to be in persistent tree.
StoreEntries m_storeEntries
Vector of DataStore entries of event durability that we are supposed to read in.
virtual void initialize() override
Initialize the Module.
TChain * m_tree
TTree for event input.
virtual void event() override
Running over all events.
virtual std::vector< std::string > getFileNames(bool outputFiles=false) override
Get list of input files, taking -i command line overrides into account.
int m_parentLevel
Level of parent files to be read.
std::vector< StoreEntries > m_parentStoreEntries
The parent DataStore entries per level.
virtual void terminate() override
Is called at the end of your Module.
std::string m_inputFileName
File to read from.
TChain * m_persistent
TTree for persistent input.
std::string m_lastParentFileLFN
last parent file LFN seen.
std::vector< std::string > m_inputFileNames
Files to read from.
void readPersistentEntry(long fileEntry)
Loads given entry from persistent tree.
virtual ~RootInputModule()
Destructor.
std::vector< std::string > m_excludeBranchNames[DataStore::c_NDurabilityTypes]
Array for names of branches that should NOT be written out.
int m_cacheSize
Input ROOT File Cache size in MB, <0 means default.
long m_nextEntry
Next entry to be read in event tree.
std::set< std::string > m_connectedBranches[DataStore::c_NDurabilityTypes]
Already connected branches.
void entryNotFound(const std::string &entryOrigin, const std::string &name, bool fileChanged=true)
Check if we warn the user or abort after an entry was missing after changing files.
bool m_ignoreCommandLineOverride
Ignore filename override from command line.
bool createParentStoreEntries()
Connect the parent trees and fill m_parentStoreEntries.
void addEventListForIndexFile(const std::string &parentLfn)
For index files, this creates TEventList/TEntryListArray to enable better cache use.
RootInputModule()
Constructor.
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
bool m_processingAllEvents
Set to true if we process the input files completely: No skip events or sequences or -n parameters.
std::vector< int > m_skipToEvent
experiment, run, event number of first event to load
ReadStats m_readStats
some statistics for all files read so far.
unsigned int m_skipNEvents
Can be set from steering file to skip the first N events.
void readTree()
Actually performs the reading from the tree.
bool readParentTrees()
Read data of the current event from the parents.
std::vector< std::string > m_branchNames[DataStore::c_NDurabilityTypes]
Array for names of branches, that shall be written out.
std::vector< std::string > m_entrySequences
The number sequences (e.g.
bool connectBranches(TTree *tree, DataStore::EDurability durability, StoreEntries *storeEntries)
Connect branches of the given tree to the data store.
static ScopeGuard guardValue(T &reference)
Create a ScopeGuard for a value: The content of reference will be copied and reset when the returned ...
Definition: ScopeGuard.h:76
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
Class to store variables with their name which were sent to the logging service.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
long getEntryNumberWithEvtRunExp(TTree *tree, long event, long run, long experiment)
return entry number with given (event, run, experiment) from tree.
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
STL namespace.
for collecting statistics over multiple files.
std::string getString() const
string suitable for printing.
void add(const ReadStats &b)
add other stats object.
void addFromFile(const TFile *f)
add current statistics from TFile object.
Wraps a stored array/object, stored under unique (name, durability) key.
Definition: StoreEntry.h:22
TObject * object
The pointer to the actual object.
Definition: StoreEntry.h:48