12 #include <framework/modules/rootio/RootInputModule.h>
14 #include <framework/io/RootIOUtilities.h>
15 #include <framework/io/RootFileInfo.h>
16 #include <framework/core/FileCatalog.h>
17 #include <framework/core/InputController.h>
18 #include <framework/pcore/Mergeable.h>
19 #include <framework/datastore/StoreObjPtr.h>
20 #include <framework/datastore/DataStore.h>
21 #include <framework/datastore/DependencyMap.h>
22 #include <framework/dataobjects/EventMetaData.h>
23 #include <framework/utilities/NumberSequence.h>
24 #include <framework/utilities/ScopeGuard.h>
25 #include <framework/database/Configuration.h>
27 #include <TClonesArray.h>
28 #include <TEventList.h>
29 #include <TObjArray.h>
30 #include <TChainElement.h>
37 using namespace RootIOUtilities;
44 setDescription(
"Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
48 vector<string> emptyvector;
49 addParam(
"inputFileName", m_inputFileName,
50 "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
52 addParam(
"inputFileNames", m_inputFileNames,
53 "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
55 addParam(
"entrySequences", m_entrySequences,
56 "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
57 "Must be specified exactly once for each file to be opened."
58 "The first event has the entry number 0.", emptyvector);
59 addParam(
"ignoreCommandLineOverride" , m_ignoreCommandLineOverride,
60 "Ignore override of file name via command line argument -i.",
false);
62 addParam(
"skipNEvents", m_skipNEvents,
"Skip this number of events before starting.", 0u);
63 addParam(
"skipToEvent", m_skipToEvent,
"Skip events until the event with "
64 "the specified (experiment, run, event number) occurs. This parameter "
65 "is useful for debugging to start with a specific event.", m_skipToEvent);
68 "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
70 "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
73 "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
74 vector<string> excludePersistent({
"ProcessStatistics"});
76 "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
78 addParam(
"parentLevel", m_parentLevel,
79 "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
82 addParam(
"collectStatistics" , m_collectStatistics,
83 "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
85 addParam(
"cacheSize", m_cacheSize,
86 "file cache size in Mbytes. If negative, use root default", 0);
88 addParam(
"discardErrorEvents", m_discardErrorEvents,
89 "Discard events with an error flag != 0",
true);
92 RootInputModule::~RootInputModule() =
default;
94 void RootInputModule::initialize()
96 unsigned int skipNEventsOverride = Environment::Instance().getSkipEventsOverride();
97 if (skipNEventsOverride != 0)
98 m_skipNEvents = skipNEventsOverride;
100 auto entrySequencesOverride = Environment::Instance().getEntrySequencesOverride();
101 if (entrySequencesOverride.size() > 0)
102 m_entrySequences = entrySequencesOverride;
104 m_nextEntry = m_skipNEvents;
105 m_lastPersistentEntry = -1;
106 m_lastParentFileLFN =
"";
108 const vector<string>& inputFiles = getFileNames();
109 if (inputFiles.empty()) {
110 B2FATAL(
"You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
112 if (!m_inputFileName.empty() && !m_inputFileNames.empty()) {
113 B2FATAL(
"Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
116 if (m_inputFileNames.empty()) {
117 B2FATAL(
"No valid files specified!");
120 if (m_entrySequences.size() > 0 and m_inputFileNames.size() != m_entrySequences.size()) {
121 B2FATAL(
"Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
122 << m_inputFileNames.size() <<
" len(entrySequences) = " << m_entrySequences.size());
125 m_inputFileName =
"";
129 m_persistent =
new TChain(
c_treeNames[DataStore::c_Persistent].c_str());
130 m_tree =
new TChain(
c_treeNames[DataStore::c_Event].c_str());
143 std::set<std::string> requiredEventBranches;
144 std::set<std::string> requiredPersistentBranches;
146 std::vector<FileMetaData> fileMetaData;
148 std::result_of<decltype(&FileMetaData::getMcEvents)(
FileMetaData)>::type sumInputMCEvents{0};
153 auto rootWarningGuard = ScopeGuard::guardValue(gErrorIgnoreLevel, kWarning + 1);
155 bool validInputMCEvents{
true};
156 for (
const string& fileName : m_inputFileNames) {
161 if (meta.getNEvents() == 0) {
162 B2WARNING(
"File appears to be empty, skipping" <<
LogVar(
"filename", fileName));
165 realDataWorkaround(meta);
166 fileMetaData.push_back(meta);
168 if (fileMetaData.front().isMC() != meta.isMC()) {
169 throw std::runtime_error(
"Mixing real data and simulated data for input files is not supported");
172 if (validInputMCEvents) {
174 if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
175 B2WARNING(
"inconsistent input files: zero mcEvents, setting total number of MC events to zero" <<
LogVar(
"filename", fileName));
176 validInputMCEvents =
false;
179 if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
180 B2FATAL(
"Number of MC events is too large and cannot be represented anymore");
184 if (requiredEventBranches.empty()) {
194 requiredEventBranches = RootIOUtilities::filterBranches(requiredEventBranches, m_branchNames[DataStore::c_Event],
195 m_excludeBranchNames[DataStore::c_Event], DataStore::c_Event);
197 requiredEventBranches.emplace(
"EventMetaData");
202 requiredPersistentBranches = RootIOUtilities::filterBranches(requiredPersistentBranches, m_branchNames[DataStore::c_Persistent],
203 m_excludeBranchNames[DataStore::c_Persistent], DataStore::c_Persistent);
210 if (m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 || m_persistent->AddFile(fileName.c_str(), 1) == 0) {
211 throw std::runtime_error(
"Could not add file to TChain");
213 B2INFO(
"Added file " + fileName);
214 }
catch (std::exception& e) {
215 B2FATAL(
"Could not open input file " << std::quoted(fileName) <<
": " << e.what());
220 if (m_tree->GetNtrees() == 0) B2FATAL(
"No file could be opened, aborting");
223 if (m_cacheSize >= 0) m_tree->SetCacheSize(m_cacheSize * 1024 * 1024);
230 std::set<std::string> unique_filenames;
234 TObjArray* fileElements = m_tree->GetListOfFiles();
235 TIter next(fileElements);
236 TChainElement* chEl =
nullptr;
237 while ((chEl = (TChainElement*)next())) {
238 if (!unique_filenames.insert(chEl->GetTitle()).second) {
239 B2WARNING(
"The input file '" << chEl->GetTitle() <<
"' was specified more than once");
241 m_processingAllEvents =
false;
244 if ((
unsigned int)m_tree->GetNtrees() != unique_filenames.size() && m_entrySequences.size() > 0) {
245 B2FATAL(
"You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
246 "Please specify each file only once if you're using the sequence feature!");
250 if (m_entrySequences.size() > 0) {
251 auto* elist =
new TEventList(
"input_event_list");
252 for (
unsigned int iFile = 0; iFile < m_entrySequences.size(); ++iFile) {
253 int64_t offset = m_tree->GetTreeOffset()[iFile];
254 int64_t next_offset = m_tree->GetTreeOffset()[iFile + 1];
256 if (m_entrySequences[iFile] ==
":") {
257 for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
258 elist->Enter(global_entry);
261 int64_t global_entry = entry + offset;
262 if (global_entry >= next_offset) {
263 B2WARNING(
"Given sequence contains entry numbers which are out of range. "
264 "I won't add any further events to the EventList for the current file.");
267 elist->Enter(global_entry);
272 m_tree->SetEventList(elist);
275 B2DEBUG(33,
"Opened tree '" +
c_treeNames[DataStore::c_Persistent] +
"'" <<
LogVar(
"entries", m_persistent->GetEntriesFast()));
276 B2DEBUG(33,
"Opened tree '" +
c_treeNames[DataStore::c_Event] +
"'" <<
LogVar(
"entries", m_tree->GetEntriesFast()));
278 connectBranches(m_persistent, DataStore::c_Persistent, &m_persistentStoreEntries);
279 readPersistentEntry(0);
281 if (!connectBranches(m_tree, DataStore::c_Event, &m_storeEntries)) {
285 InputController::setCanControlInput(
true);
286 InputController::setChain(m_tree);
289 if (m_parentLevel > 0) {
290 createParentStoreEntries();
291 }
else if (m_parentLevel < 0) {
292 B2ERROR(
"parentLevel must be >= 0!");
300 unsigned int maxEvent = Environment::Instance().getNumberEventsOverride();
301 m_processingAllEvents &= m_skipNEvents == 0 && m_entrySequences.size() == 0;
302 m_processingAllEvents &= (maxEvent == 0 || maxEvent >= InputController::numEntries());
304 if (!m_skipToEvent.empty()) {
306 m_processingAllEvents =
false;
308 if (m_skipToEvent.size() != 3) {
309 B2ERROR(
"skipToEvent must be a list of three values: experiment, run, event number");
311 m_skipToEvent.clear();
313 InputController::setNextEntry(m_skipToEvent[0], m_skipToEvent[1], m_skipToEvent[2]);
315 if (m_nextEntry > 0) {
316 B2ERROR(
"You cannot supply a number of events to skip (skipNEvents) and an "
317 "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
324 if (m_processingAllEvents) {
325 Environment::Instance().setNumberOfMCEvents(sumInputMCEvents);
328 Conditions::Configuration::getInstance().setInputMetadata(fileMetaData);
332 void RootInputModule::event()
338 const long nextEntry = InputController::getNextEntry();
339 if (nextEntry >= 0 && nextEntry < InputController::numEntries()) {
340 B2INFO(
"RootInput: will read entry " << nextEntry <<
" next.");
341 m_nextEntry = nextEntry;
342 }
else if (InputController::getNextExperiment() >= 0 && InputController::getNextRun() >= 0
343 && InputController::getNextEvent() >= 0) {
344 const long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(m_tree->GetTree(), InputController::getNextEvent(),
345 InputController::getNextRun(), InputController::getNextExperiment());
347 const long chainentry = m_tree->GetChainEntryNumber(entry);
348 B2INFO(
"RootInput: will read entry " << chainentry <<
" (entry " << entry <<
" in current file) next.");
349 m_nextEntry = chainentry;
351 B2ERROR(
"Couldn't find entry (" << InputController::getNextEvent() <<
", " << InputController::getNextRun() <<
", " <<
352 InputController::getNextExperiment() <<
") in file! Loading entry " << m_nextEntry <<
" instead.");
355 InputController::eventLoaded(m_nextEntry);
361 unsigned int errorFlag = 0;
362 if (m_discardErrorEvents && (m_nextEntry >= 0)) {
364 errorFlag = eventMetaData->getErrorFlag();
365 if (errorFlag != 0) {
366 B2WARNING(
"Discarding corrupted event" <<
LogVar(
"errorFlag", errorFlag) <<
LogVar(
"experiment", eventMetaData->getExperiment())
367 <<
LogVar(
"run", eventMetaData->getRun()) <<
LogVar(
"event", eventMetaData->getEvent()));
370 if (errorFlag == 0)
break;
375 void RootInputModule::terminate()
377 if (m_collectStatistics and m_tree) {
379 m_readStats.addFromFile(m_tree->GetFile());
384 for (
const auto& entry : m_parentTrees) {
385 TFile* f = entry.second->GetCurrentFile();
386 if (m_collectStatistics)
392 if (m_collectStatistics) {
393 B2INFO(
"Statistics for event tree: " << m_readStats.getString());
394 B2INFO(
"Statistics for event tree (parent files): " << parentReadStats.
getString());
397 for (
auto& branch : m_connectedBranches) {
400 m_storeEntries.clear();
401 m_persistentStoreEntries.clear();
402 m_parentStoreEntries.clear();
403 m_parentTrees.clear();
407 void RootInputModule::readTree()
414 if (m_collectStatistics) {
419 int localEntryNumber = m_nextEntry;
420 if (m_entrySequences.size() > 0) {
421 localEntryNumber = m_tree->GetEntryNumber(localEntryNumber);
423 localEntryNumber = m_tree->LoadTree(localEntryNumber);
425 if (localEntryNumber == -2) {
428 }
else if (localEntryNumber < 0) {
429 B2FATAL(
"Failed to load tree, corrupt file? Check standard error for additional messages. (TChain::LoadTree() returned error " <<
430 localEntryNumber <<
")");
432 B2DEBUG(39,
"Reading file entry " << m_nextEntry);
435 for (
auto entry : m_storeEntries) {
436 entry->resetForGetEntry();
438 for (
const auto& storeEntries : m_parentStoreEntries) {
439 for (
auto entry : storeEntries) {
440 entry->resetForGetEntry();
444 int bytesRead = m_tree->GetTree()->GetEntry(localEntryNumber);
445 if (bytesRead <= 0) {
446 B2FATAL(
"Could not read 'tree' entry " << m_nextEntry <<
" in file " << m_tree->GetCurrentFile()->GetName());
457 const long treeNum = m_tree->GetTreeNumber();
458 const bool fileChanged = (m_lastPersistentEntry != treeNum);
460 if (m_collectStatistics) {
461 m_readStats.add(currentEventStats);
464 readPersistentEntry(treeNum);
465 B2INFO(
"Loading new input file"
466 <<
LogVar(
"filename", m_tree->GetFile()->GetName())
467 <<
LogVar(
"metadata LFN", fileMetaData->getLfn()));
469 realDataWorkaround(*fileMetaData);
471 for (
auto entry : m_storeEntries) {
472 if (!entry->object) {
473 entryNotFound(
"Event durability tree (global entry: " + std::to_string(m_nextEntry) +
")", entry->name, fileChanged);
474 entry->recoverFromNullObject();
475 entry->ptr =
nullptr;
477 entry->ptr = entry->object;
481 if (m_parentLevel > 0) {
482 if (!readParentTrees())
483 B2FATAL(
"Could not read data from parent file!");
490 B2DEBUG(30,
"File changed, loading persistent data.");
493 const TObjArray* branchesObjArray = tree->GetListOfBranches();
494 if (!branchesObjArray) {
495 B2FATAL(
"Tree '" << tree->GetName() <<
"' doesn't contain any branches!");
497 std::vector<TBranch*> branches;
498 set<string> branchList;
499 for (
int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
500 auto* branch =
static_cast<TBranch*
>(branchesObjArray->At(jj));
501 if (!branch)
continue;
502 branchList.insert(branch->GetName());
503 branches.emplace_back(branch);
508 branchList =
filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability,
true);
509 for (TBranch* branch : branches) {
510 const std::string branchName = branch->GetName();
512 if (m_connectedBranches[durability].find(branchName) != m_connectedBranches[durability].end())
515 if ((branchList.count(branchName) == 0) and
516 ((branchName !=
"FileMetaData") || (tree != m_persistent)) and
517 ((branchName !=
"EventMetaData") || (tree != m_tree))) {
521 B2DEBUG(32,
"Enabling branch" <<
LogVar(
"branchName", branchName)
522 <<
LogVar(
"children found", found));
525 TObject* objectPtr =
nullptr;
526 branch->SetAddress(&objectPtr);
528 bool array = (string(branch->GetClassName()) ==
"TClonesArray");
529 TClass* objClass =
nullptr;
531 objClass = (
static_cast<TClonesArray*
>(objectPtr))->GetClass();
533 objClass = objectPtr->IsA();
537 if (!DataStore::Instance().registerEntry(branchName, durability, objClass, array, DataStore::c_WriteOut)) {
538 B2FATAL(
"Cannot connect branch to datastore" <<
LogVar(
"branchName", branchName));
542 tree->SetBranchAddress(branch->GetName(), &(entry.
object));
543 if (storeEntries) storeEntries->push_back(&entry);
546 m_connectedBranches[durability].insert(branchName);
553 bool RootInputModule::createParentStoreEntries()
556 TBranch* branch = m_tree->GetBranch(
"EventMetaData");
557 char* address = branch->GetAddress();
559 branch->SetAddress(&eventMetaData);
562 int run = eventMetaData->
getRun();
563 unsigned int event = eventMetaData->
getEvent();
565 branch->SetAddress(address);
568 for (
int level = 0; level < m_parentLevel; level++) {
570 TDirectory* dir = gDirectory;
571 const std::string parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
572 TFile* file = TFile::Open(parentPfn.c_str(),
"READ");
574 if (!file || !file->IsOpen()) {
575 B2ERROR(
"Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
576 <<
LogVar(
"LFN", parentLfn) <<
LogVar(
"PFN", parentPfn));
581 auto* tree =
dynamic_cast<TTree*
>(file->Get(
c_treeNames[DataStore::c_Event].c_str()));
583 B2ERROR(
"No tree " <<
c_treeNames[DataStore::c_Event] <<
" found in " << parentPfn);
586 if (
int(m_parentStoreEntries.size()) <= level) m_parentStoreEntries.resize(level + 1);
587 connectBranches(tree, DataStore::c_Event, &m_parentStoreEntries[level]);
588 m_parentTrees.insert(std::make_pair(parentLfn, tree));
591 auto* persistent =
dynamic_cast<TTree*
>(file->Get(
c_treeNames[DataStore::c_Persistent].c_str()));
593 B2ERROR(
"No tree " <<
c_treeNames[DataStore::c_Persistent] <<
" found in " << parentPfn);
596 connectBranches(persistent, DataStore::c_Persistent,
nullptr);
600 tree->SetBranchAddress(
"EventMetaData", &metaData);
601 long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
602 tree->GetBranch(
"EventMetaData")->GetEntry(entry);
610 bool RootInputModule::readParentTrees()
613 int experiment = eventMetaData->getExperiment();
614 int run = eventMetaData->getRun();
615 unsigned int event = eventMetaData->getEvent();
617 std::string parentLfn = eventMetaData->getParentLfn();
618 for (
int level = 0; level < m_parentLevel; level++) {
619 const std::string& parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
622 TTree* tree =
nullptr;
623 if (m_parentTrees.find(parentLfn) == m_parentTrees.end()) {
624 TDirectory* dir = gDirectory;
625 B2DEBUG(30,
"Opening parent file" <<
LogVar(
"PFN", parentPfn));
626 TFile* file = TFile::Open(parentPfn.c_str(),
"READ");
628 if (!file || !file->IsOpen()) {
629 B2ERROR(
"Couldn't open parent file " << parentPfn);
632 tree =
dynamic_cast<TTree*
>(file->Get(
c_treeNames[DataStore::c_Event].c_str()));
634 B2ERROR(
"No tree " <<
c_treeNames[DataStore::c_Event] <<
" found in " << parentPfn);
637 for (
auto entry : m_parentStoreEntries[level]) {
638 tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
640 m_parentTrees.insert(std::make_pair(parentLfn, tree));
642 tree = m_parentTrees[parentLfn];
646 long entryNumber = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
647 if (entryNumber < 0) {
648 B2ERROR(
"No event " << experiment <<
"/" << run <<
"/" << event <<
" in parent file " << parentPfn);
654 tree->SetBranchAddress(
"EventMetaData", &parentMetaData);
655 tree->GetEntry(entryNumber);
656 for (
auto entry : m_parentStoreEntries[level]) {
657 entry->ptr = entry->object;
664 addEventListForIndexFile(parentLfn);
669 void RootInputModule::addEventListForIndexFile(
const std::string& parentLfn)
672 if (!(m_parentLevel > 0 and m_storeEntries.size() == 1))
675 if (parentLfn == m_lastParentFileLFN)
677 m_lastParentFileLFN = parentLfn;
679 B2INFO(
"Index file detected, scanning to generate event list.");
680 TTree* tree = m_parentTrees.at(parentLfn);
683 auto* elist =
new TEventList(
"parent_entrylist");
686 TBranch* branch = m_tree->GetBranch(
"EventMetaData");
687 auto* address = branch->GetAddress();
689 branch->SetAddress(&eventMetaData);
690 long nEntries = m_tree->GetEntries();
691 for (
long i = m_nextEntry; i < nEntries; i++) {
694 int run = eventMetaData->
getRun();
695 unsigned int event = eventMetaData->
getEvent();
696 const std::string& newParentLfn = eventMetaData->
getParentLfn();
698 if (parentLfn != newParentLfn) {
702 long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
705 branch->SetAddress(address);
708 tree->SetEventList(elist);
713 void RootInputModule::entryNotFound(
const std::string& entryOrigin,
const std::string& name,
bool fileChanged)
715 if (name ==
"ProcessStatistics" or DataStore::Instance().getDependencyMap().isUsedAs(name, DependencyMap::c_Input)) {
716 B2FATAL(entryOrigin <<
" in " << m_tree->GetFile()->GetName() <<
" does not contain required object " << name <<
", aborting.");
717 }
else if (fileChanged) {
718 B2WARNING(entryOrigin <<
" in " << m_tree->GetFile()->GetName() <<
" does not contain object " << name <<
719 " that was present in a previous entry.");
723 void RootInputModule::readPersistentEntry(
long fileEntry)
725 m_lastPersistentEntry = fileEntry;
727 for (
auto entry : m_persistentStoreEntries) {
728 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
729 TObject* copyOfPreviousVersion =
nullptr;
731 copyOfPreviousVersion = entry->object->Clone();
733 entry->resetForGetEntry();
735 entry->ptr = copyOfPreviousVersion;
738 int bytesRead = m_persistent->GetEntry(fileEntry);
739 if (bytesRead <= 0) {
740 const char* name = m_tree->GetCurrentFile() ? m_tree->GetCurrentFile()->GetName() :
"<unknown>";
741 B2FATAL(
"Could not read 'persistent' TTree #" << fileEntry <<
" in file " << name);
744 for (
auto entry : m_persistentStoreEntries) {
746 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
749 auto* newObj =
static_cast<Mergeable*
>(entry->object);
750 newObj->
merge(oldObj);
754 entry->ptr = entry->object;
756 entryNotFound(
"Persistent tree", entry->name);
757 entry->recoverFromNullObject();
758 entry->ptr =
nullptr;
765 if ((metaData.
getSite().find(
"bfe0") == 0) && (metaData.
getDate().compare(
"2019-06-30") < 0) &&