10 #include <framework/modules/rootio/RootInputModule.h>
12 #include <framework/io/RootIOUtilities.h>
13 #include <framework/io/RootFileInfo.h>
14 #include <framework/core/FileCatalog.h>
15 #include <framework/core/InputController.h>
16 #include <framework/pcore/Mergeable.h>
17 #include <framework/datastore/StoreObjPtr.h>
18 #include <framework/datastore/DataStore.h>
19 #include <framework/datastore/DependencyMap.h>
20 #include <framework/dataobjects/EventMetaData.h>
21 #include <framework/utilities/NumberSequence.h>
22 #include <framework/utilities/ScopeGuard.h>
23 #include <framework/database/Configuration.h>
25 #include <TClonesArray.h>
26 #include <TEventList.h>
27 #include <TObjArray.h>
28 #include <TChainElement.h>
35 using namespace RootIOUtilities;
39 RootInputModule::RootInputModule() :
Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
42 setDescription(
"Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
46 vector<string> emptyvector;
48 "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
51 "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
54 "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
55 "Must be specified exactly once for each file to be opened."
56 "The first event has the entry number 0.", emptyvector);
58 "Ignore override of file name via command line argument -i.",
false);
62 "the specified (experiment, run, event number) occurs. This parameter "
63 "is useful for debugging to start with a specific event.",
m_skipToEvent);
66 "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
68 "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
71 "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
72 vector<string> excludePersistent({
"ProcessStatistics"});
74 "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
77 "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
81 "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
84 "file cache size in Mbytes. If negative, use root default", 0);
89 "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined "
90 "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags "
94 "When using a second RootInputModule in an independent path [usually if you are using add_independent_merge_path(...)] "
95 "this has to be set to true",
104 if (skipNEventsOverride != 0)
108 if (entrySequencesOverride.size() > 0)
116 if (inputFiles.empty()) {
117 B2FATAL(
"You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
120 B2FATAL(
"Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
124 B2FATAL(
"No valid files specified!");
128 B2FATAL(
"Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
150 std::set<std::string> requiredEventBranches;
151 std::set<std::string> requiredPersistentBranches;
153 std::vector<FileMetaData> fileMetaData;
162 bool validInputMCEvents{
true};
168 if (meta.getNEvents() == 0) {
169 B2WARNING(
"File appears to be empty, skipping" <<
LogVar(
"filename", fileName));
173 fileMetaData.push_back(meta);
175 if (fileMetaData.front().isMC() != meta.isMC()) {
176 throw std::runtime_error(
"Mixing real data and simulated data for input files is not supported");
179 if (validInputMCEvents) {
181 if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
182 B2WARNING(
"inconsistent input files: zero mcEvents, setting total number of MC events to zero" <<
LogVar(
"filename", fileName));
183 validInputMCEvents =
false;
186 if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
187 B2FATAL(
"Number of MC events is too large and cannot be represented anymore");
191 if (requiredEventBranches.empty()) {
204 requiredEventBranches.emplace(
"EventMetaData");
217 if (
m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 ||
m_persistent->AddFile(fileName.c_str(), 1) == 0) {
218 throw std::runtime_error(
"Could not add file to TChain");
220 B2INFO(
"Added file " + fileName);
221 }
catch (std::exception& e) {
222 B2FATAL(
"Could not open input file " << std::quoted(fileName) <<
": " << e.what());
227 if (
m_tree->GetNtrees() == 0) B2FATAL(
"No file could be opened, aborting");
237 std::set<std::string> unique_filenames;
241 TObjArray* fileElements =
m_tree->GetListOfFiles();
242 TIter next(fileElements);
243 TChainElement* chEl =
nullptr;
244 while ((chEl = (TChainElement*)next())) {
245 if (!unique_filenames.insert(chEl->GetTitle()).second) {
246 B2WARNING(
"The input file '" << chEl->GetTitle() <<
"' was specified more than once");
252 B2FATAL(
"You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
253 "Please specify each file only once if you're using the sequence feature!");
258 auto* elist =
new TEventList(
"input_event_list");
260 int64_t offset =
m_tree->GetTreeOffset()[iFile];
261 int64_t next_offset =
m_tree->GetTreeOffset()[iFile + 1];
264 for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
265 elist->Enter(global_entry);
268 int64_t global_entry = entry + offset;
269 if (global_entry >= next_offset) {
270 B2WARNING(
"Given sequence contains entry numbers which are out of range. "
271 "I won't add any further events to the EventList for the current file.");
274 elist->Enter(global_entry);
279 m_tree->SetEventList(elist);
299 B2ERROR(
"parentLevel must be >= 0!");
316 B2ERROR(
"skipToEvent must be a list of three values: experiment, run, event number");
323 B2ERROR(
"You cannot supply a number of events to skip (skipNEvents) and an "
324 "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
355 B2INFO(
"RootInput: will read entry " << nextEntry <<
" next.");
363 const long chainentry =
m_tree->GetChainEntryNumber(entry);
364 B2INFO(
"RootInput: will read entry " << chainentry <<
" (entry " << entry <<
" in current file) next.");
377 unsigned int errorFlag = 0;
380 errorFlag = eventMetaData->getErrorFlag();
381 if (errorFlag != 0) {
383 B2WARNING(
"Discarding corrupted event" <<
LogVar(
"errorFlag", errorFlag) <<
LogVar(
"experiment", eventMetaData->getExperiment())
384 <<
LogVar(
"run", eventMetaData->getRun()) <<
LogVar(
"event", eventMetaData->getEvent()));
387 eventMetaData->setEndOfData();
390 if (errorFlag == 0)
break;
405 TFile* f = entry.second->GetCurrentFile();
414 B2INFO(
"Statistics for event tree (parent files): " << parentReadStats.
getString());
441 localEntryNumber =
m_tree->GetEntryNumber(localEntryNumber);
443 localEntryNumber =
m_tree->LoadTree(localEntryNumber);
445 if (localEntryNumber == -2) {
448 }
else if (localEntryNumber < 0) {
449 B2FATAL(
"Failed to load tree, corrupt file? Check standard error for additional messages. TChain::LoadTree() returned" <<
450 LogVar(
"error", localEntryNumber));
456 entry->resetForGetEntry();
459 for (
auto entry : storeEntries) {
460 entry->resetForGetEntry();
464 int bytesRead =
m_tree->GetTree()->GetEntry(localEntryNumber);
465 if (bytesRead <= 0) {
466 B2FATAL(
"Could not read 'tree' entry " <<
m_nextEntry <<
" in file " <<
m_tree->GetCurrentFile()->GetName());
477 const long treeNum =
m_tree->GetTreeNumber();
485 B2INFO(
"Loading new input file"
487 <<
LogVar(
"metadata LFN", fileMetaData->getLfn()));
492 if (!entry->object) {
493 entryNotFound(
"Event durability tree (global entry: " + std::to_string(
m_nextEntry) +
")", entry->name, fileChanged);
494 entry->recoverFromNullObject();
495 entry->ptr =
nullptr;
497 entry->ptr = entry->object;
503 B2FATAL(
"Could not read data from parent file!");
509 if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
512 for (
auto entry : storeEntries) {
513 if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
520 B2DEBUG(30,
"File changed, loading persistent data.");
523 const TObjArray* branchesObjArray = tree->GetListOfBranches();
524 if (!branchesObjArray) {
525 B2FATAL(
"Tree '" << tree->GetName() <<
"' doesn't contain any branches!");
527 std::vector<TBranch*> branches;
528 set<string> branchList;
529 for (
int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
530 auto* branch =
static_cast<TBranch*
>(branchesObjArray->At(jj));
531 if (!branch)
continue;
532 branchList.insert(branch->GetName());
533 branches.emplace_back(branch);
539 for (TBranch* branch : branches) {
540 const std::string branchName = branch->GetName();
545 if ((branchList.count(branchName) == 0) and
546 ((branchName !=
"FileMetaData") || (tree !=
m_persistent)) and
547 ((branchName !=
"EventMetaData") || (tree !=
m_tree))) {
551 B2DEBUG(32,
"Enabling branch" <<
LogVar(
"branchName", branchName)
552 <<
LogVar(
"children found", found));
555 TObject* objectPtr =
nullptr;
556 branch->SetAddress(&objectPtr);
558 bool array = (string(branch->GetClassName()) ==
"TClonesArray");
559 TClass* objClass =
nullptr;
561 objClass = (
static_cast<TClonesArray*
>(objectPtr))->GetClass();
563 objClass = objectPtr->IsA();
568 B2FATAL(
"Cannot connect branch to datastore" <<
LogVar(
"branchName", branchName));
572 tree->SetBranchAddress(branch->GetName(), &(entry.
object));
573 if (storeEntries) storeEntries->push_back(&entry);
587 TBranch* branch =
m_tree->GetBranch(
"EventMetaData");
588 char* address = branch->GetAddress();
590 branch->SetAddress(&eventMetaData);
593 int run = eventMetaData->
getRun();
594 unsigned int event = eventMetaData->
getEvent();
596 branch->SetAddress(address);
601 TDirectory* dir = gDirectory;
603 TFile* file = TFile::Open(parentPfn.c_str(),
"READ");
605 if (!file || !file->IsOpen()) {
606 B2ERROR(
"Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
607 <<
LogVar(
"LFN", parentLfn) <<
LogVar(
"PFN", parentPfn));
631 tree->SetBranchAddress(
"EventMetaData", &metaData);
633 tree->GetBranch(
"EventMetaData")->GetEntry(entry);
644 int experiment = eventMetaData->getExperiment();
645 int run = eventMetaData->getRun();
646 unsigned int event = eventMetaData->getEvent();
648 std::string parentLfn = eventMetaData->getParentLfn();
653 TTree* tree =
nullptr;
655 TDirectory* dir = gDirectory;
656 B2DEBUG(30,
"Opening parent file" <<
LogVar(
"PFN", parentPfn));
657 TFile* file = TFile::Open(parentPfn.c_str(),
"READ");
659 if (!file || !file->IsOpen()) {
660 B2ERROR(
"Couldn't open parent file " << parentPfn);
669 tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
678 if (entryNumber < 0) {
679 B2ERROR(
"No event " << experiment <<
"/" << run <<
"/" <<
event <<
" in parent file " << parentPfn);
685 tree->SetBranchAddress(
"EventMetaData", &parentMetaData);
686 tree->GetEntry(entryNumber);
688 entry->ptr = entry->object;
710 B2INFO(
"Index file detected, scanning to generate event list.");
714 auto* elist =
new TEventList(
"parent_entrylist");
717 TBranch* branch =
m_tree->GetBranch(
"EventMetaData");
718 auto* address = branch->GetAddress();
720 branch->SetAddress(&eventMetaData);
721 long nEntries =
m_tree->GetEntries();
725 int run = eventMetaData->
getRun();
726 unsigned int event = eventMetaData->
getEvent();
727 const std::string& newParentLfn = eventMetaData->
getParentLfn();
729 if (parentLfn != newParentLfn) {
736 branch->SetAddress(address);
739 tree->SetEventList(elist);
747 B2FATAL(entryOrigin <<
" in " <<
m_tree->GetFile()->GetName() <<
" does not contain required object " << name <<
", aborting.");
748 }
else if (fileChanged) {
749 B2WARNING(entryOrigin <<
" in " <<
m_tree->GetFile()->GetName() <<
" does not contain object " << name <<
750 " that was present in a previous entry.");
759 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
760 TObject* copyOfPreviousVersion =
nullptr;
762 copyOfPreviousVersion = entry->object->Clone();
764 entry->resetForGetEntry();
766 entry->ptr = copyOfPreviousVersion;
770 if (bytesRead <= 0) {
771 const char* name =
m_tree->GetCurrentFile() ?
m_tree->GetCurrentFile()->GetName() :
"<unknown>";
772 B2FATAL(
"Could not read 'persistent' TTree #" << fileEntry <<
" in file " << name);
777 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
780 auto* newObj =
static_cast<Mergeable*
>(entry->object);
781 newObj->
merge(oldObj);
785 entry->ptr = entry->object;
788 entry->recoverFromNullObject();
789 entry->ptr =
nullptr;
796 if ((metaData.
getSite().find(
"bfe0") == 0) && (metaData.
getDate().compare(
"2019-06-30") < 0) &&
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void setInputMetadata(const std::vector< FileMetaData > &inputMetadata)
To be called by input modules with the list of all input FileMetaData.
@ c_WriteOut
Object/array should be saved by output modules.
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
EDurability
Durability types.
@ c_Persistent
Object is available during entire execution time.
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
static DataStore & Instance()
Instance of singleton Store.
bool registerEntry(const std::string &name, EDurability durability, TClass *objClass, bool array, EStoreFlags storeFlags)
Register an entry in the DataStore map.
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
unsigned int getSkipEventsOverride() const
Get skipNEvents override, or 0 if unset.
std::vector< std::string > getEntrySequencesOverride() const
Returns the number sequences (e.g.
static Environment & Instance()
Static method to get a reference to the Environment instance.
unsigned int getNumberEventsOverride() const
Returns number of events in run 1 for EventInfoSetter module, or 0 for no override.
void setNumberOfMCEvents(unsigned int n)
Set number of generated events (for EventInfoSetter).
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual std::string getPhysicalFileName(const std::string &lfn)
Get the physical file name for the LFN.
Abstract base class for objects that can be merged.
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
void setDescription(const std::string &description)
Sets the description of the module.
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
@ c_Input
This module is an input module (reads data).
Helper class to factorize some necessary tasks when working with Belle2 output files.
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
static ScopeGuard guardValue(T &reference)
Create a ScopeGuard for a value: The content of reference will be copied and reset when the returned ...
Type-safe access to single objects in the data store.
Class to store variables with their name which were sent to the logging service.
void addParam(const std::string &name, T ¶mVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
long getEntryNumberWithEvtRunExp(TTree *tree, long event, long run, long experiment)
return entry number with given (event, run, experiment) from tree.
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
Wraps a stored array/object, stored under unique (name, durability) key.
TObject * object
The pointer to the actual object.