10 #include <framework/modules/rootio/RootInputModule.h>
12 #include <framework/io/RootIOUtilities.h>
13 #include <framework/io/RootFileInfo.h>
14 #include <framework/core/FileCatalog.h>
15 #include <framework/core/InputController.h>
16 #include <framework/pcore/Mergeable.h>
17 #include <framework/datastore/StoreObjPtr.h>
18 #include <framework/datastore/DataStore.h>
19 #include <framework/datastore/DependencyMap.h>
20 #include <framework/dataobjects/EventMetaData.h>
21 #include <framework/utilities/NumberSequence.h>
22 #include <framework/utilities/ScopeGuard.h>
23 #include <framework/database/Configuration.h>
25 #include <TClonesArray.h>
26 #include <TEventList.h>
27 #include <TObjArray.h>
28 #include <TChainElement.h>
35 using namespace RootIOUtilities;
39 RootInputModule::RootInputModule() :
Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
42 setDescription(
"Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
46 vector<string> emptyvector;
48 "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
51 "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
54 "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
55 "Must be specified exactly once for each file to be opened."
56 "The first event has the entry number 0.", emptyvector);
58 "Ignore override of file name via command line argument -i.",
false);
62 "the specified (experiment, run, event number) occurs. This parameter "
63 "is useful for debugging to start with a specific event.",
m_skipToEvent);
66 "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
68 "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
71 "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
72 vector<string> excludePersistent({
"ProcessStatistics"});
74 "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
77 "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
81 "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
84 "file cache size in Mbytes. If negative, use root default", 0);
89 "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined "
90 "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags "
99 if (skipNEventsOverride != 0)
103 if (entrySequencesOverride.size() > 0)
111 if (inputFiles.empty()) {
112 B2FATAL(
"You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
115 B2FATAL(
"Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
119 B2FATAL(
"No valid files specified!");
123 B2FATAL(
"Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
145 std::set<std::string> requiredEventBranches;
146 std::set<std::string> requiredPersistentBranches;
148 std::vector<FileMetaData> fileMetaData;
157 bool validInputMCEvents{
true};
164 B2WARNING(
"File appears to be empty, skipping" <<
LogVar(
"filename", fileName));
168 fileMetaData.push_back(meta);
170 if (fileMetaData.front().isMC() != meta.
isMC()) {
171 throw std::runtime_error(
"Mixing real data and simulated data for input files is not supported");
174 if (validInputMCEvents) {
176 if ((sumInputMCEvents > 0 and meta.
getMcEvents() == 0)) {
177 B2WARNING(
"inconsistent input files: zero mcEvents, setting total number of MC events to zero" <<
LogVar(
"filename", fileName));
178 validInputMCEvents =
false;
181 if (__builtin_add_overflow(sumInputMCEvents, meta.
getMcEvents(), &sumInputMCEvents)) {
182 B2FATAL(
"Number of MC events is too large and cannot be represented anymore");
186 if (requiredEventBranches.empty()) {
199 requiredEventBranches.emplace(
"EventMetaData");
213 throw std::runtime_error(
"Could not add file to TChain");
215 B2INFO(
"Added file " + fileName);
216 }
catch (std::exception& e) {
217 B2FATAL(
"Could not open input file " << std::quoted(fileName) <<
": " << e.what());
222 if (
m_tree->GetNtrees() == 0) B2FATAL(
"No file could be opened, aborting");
232 std::set<std::string> unique_filenames;
236 TObjArray* fileElements =
m_tree->GetListOfFiles();
237 TIter next(fileElements);
238 TChainElement* chEl =
nullptr;
239 while ((chEl = (TChainElement*)next())) {
240 if (!unique_filenames.insert(chEl->GetTitle()).second) {
241 B2WARNING(
"The input file '" << chEl->GetTitle() <<
"' was specified more than once");
247 B2FATAL(
"You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
248 "Please specify each file only once if you're using the sequence feature!");
253 auto* elist =
new TEventList(
"input_event_list");
255 int64_t offset =
m_tree->GetTreeOffset()[iFile];
256 int64_t next_offset =
m_tree->GetTreeOffset()[iFile + 1];
259 for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
260 elist->Enter(global_entry);
263 int64_t global_entry = entry + offset;
264 if (global_entry >= next_offset) {
265 B2WARNING(
"Given sequence contains entry numbers which are out of range. "
266 "I won't add any further events to the EventList for the current file.");
269 elist->Enter(global_entry);
274 m_tree->SetEventList(elist);
294 B2ERROR(
"parentLevel must be >= 0!");
311 B2ERROR(
"skipToEvent must be a list of three values: experiment, run, event number");
318 B2ERROR(
"You cannot supply a number of events to skip (skipNEvents) and an "
319 "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
342 B2INFO(
"RootInput: will read entry " << nextEntry <<
" next.");
349 const long chainentry =
m_tree->GetChainEntryNumber(entry);
350 B2INFO(
"RootInput: will read entry " << chainentry <<
" (entry " << entry <<
" in current file) next.");
363 unsigned int errorFlag = 0;
366 errorFlag = eventMetaData->getErrorFlag();
367 if (errorFlag != 0) {
369 B2WARNING(
"Discarding corrupted event" <<
LogVar(
"errorFlag", errorFlag) <<
LogVar(
"experiment", eventMetaData->getExperiment())
370 <<
LogVar(
"run", eventMetaData->getRun()) <<
LogVar(
"event", eventMetaData->getEvent()));
373 eventMetaData->setEndOfData();
376 if (errorFlag == 0)
break;
391 TFile* f = entry.second->GetCurrentFile();
400 B2INFO(
"Statistics for event tree (parent files): " << parentReadStats.
getString());
427 localEntryNumber =
m_tree->GetEntryNumber(localEntryNumber);
429 localEntryNumber =
m_tree->LoadTree(localEntryNumber);
431 if (localEntryNumber == -2) {
434 }
else if (localEntryNumber < 0) {
435 B2FATAL(
"Failed to load tree, corrupt file? Check standard error for additional messages. TChain::LoadTree() returned" <<
436 LogVar(
"error", localEntryNumber));
442 entry->resetForGetEntry();
445 for (
auto entry : storeEntries) {
446 entry->resetForGetEntry();
450 int bytesRead =
m_tree->GetTree()->GetEntry(localEntryNumber);
451 if (bytesRead <= 0) {
452 B2FATAL(
"Could not read 'tree' entry " <<
m_nextEntry <<
" in file " <<
m_tree->GetCurrentFile()->GetName());
463 const long treeNum =
m_tree->GetTreeNumber();
471 B2INFO(
"Loading new input file"
473 <<
LogVar(
"metadata LFN", fileMetaData->getLfn()));
478 if (!entry->object) {
479 entryNotFound(
"Event durability tree (global entry: " + std::to_string(
m_nextEntry) +
")", entry->name, fileChanged);
480 entry->recoverFromNullObject();
481 entry->ptr =
nullptr;
483 entry->ptr = entry->object;
489 B2FATAL(
"Could not read data from parent file!");
495 if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
498 for (
auto entry : storeEntries) {
499 if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
506 B2DEBUG(30,
"File changed, loading persistent data.");
509 const TObjArray* branchesObjArray = tree->GetListOfBranches();
510 if (!branchesObjArray) {
511 B2FATAL(
"Tree '" << tree->GetName() <<
"' doesn't contain any branches!");
513 std::vector<TBranch*> branches;
514 set<string> branchList;
515 for (
int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
516 auto* branch =
static_cast<TBranch*
>(branchesObjArray->At(jj));
517 if (!branch)
continue;
518 branchList.insert(branch->GetName());
519 branches.emplace_back(branch);
525 for (TBranch* branch : branches) {
526 const std::string branchName = branch->GetName();
531 if ((branchList.count(branchName) == 0) and
532 ((branchName !=
"FileMetaData") || (tree !=
m_persistent)) and
533 ((branchName !=
"EventMetaData") || (tree !=
m_tree))) {
537 B2DEBUG(32,
"Enabling branch" <<
LogVar(
"branchName", branchName)
538 <<
LogVar(
"children found", found));
541 TObject* objectPtr =
nullptr;
542 branch->SetAddress(&objectPtr);
544 bool array = (string(branch->GetClassName()) ==
"TClonesArray");
545 TClass* objClass =
nullptr;
547 objClass = (
static_cast<TClonesArray*
>(objectPtr))->GetClass();
549 objClass = objectPtr->IsA();
554 B2FATAL(
"Cannot connect branch to datastore" <<
LogVar(
"branchName", branchName));
558 tree->SetBranchAddress(branch->GetName(), &(entry.
object));
559 if (storeEntries) storeEntries->push_back(&entry);
572 TBranch* branch =
m_tree->GetBranch(
"EventMetaData");
573 char* address = branch->GetAddress();
575 branch->SetAddress(&eventMetaData);
578 int run = eventMetaData->
getRun();
579 unsigned int event = eventMetaData->
getEvent();
581 branch->SetAddress(address);
586 TDirectory* dir = gDirectory;
588 TFile* file = TFile::Open(parentPfn.c_str(),
"READ");
590 if (!file || !file->IsOpen()) {
591 B2ERROR(
"Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
592 <<
LogVar(
"LFN", parentLfn) <<
LogVar(
"PFN", parentPfn));
616 tree->SetBranchAddress(
"EventMetaData", &metaData);
618 tree->GetBranch(
"EventMetaData")->GetEntry(entry);
629 int experiment = eventMetaData->getExperiment();
630 int run = eventMetaData->getRun();
631 unsigned int event = eventMetaData->getEvent();
633 std::string parentLfn = eventMetaData->getParentLfn();
638 TTree* tree =
nullptr;
640 TDirectory* dir = gDirectory;
641 B2DEBUG(30,
"Opening parent file" <<
LogVar(
"PFN", parentPfn));
642 TFile* file = TFile::Open(parentPfn.c_str(),
"READ");
644 if (!file || !file->IsOpen()) {
645 B2ERROR(
"Couldn't open parent file " << parentPfn);
654 tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
663 if (entryNumber < 0) {
664 B2ERROR(
"No event " << experiment <<
"/" << run <<
"/" <<
event <<
" in parent file " << parentPfn);
670 tree->SetBranchAddress(
"EventMetaData", &parentMetaData);
671 tree->GetEntry(entryNumber);
673 entry->ptr = entry->object;
695 B2INFO(
"Index file detected, scanning to generate event list.");
699 auto* elist =
new TEventList(
"parent_entrylist");
702 TBranch* branch =
m_tree->GetBranch(
"EventMetaData");
703 auto* address = branch->GetAddress();
705 branch->SetAddress(&eventMetaData);
706 long nEntries =
m_tree->GetEntries();
710 int run = eventMetaData->
getRun();
711 unsigned int event = eventMetaData->
getEvent();
712 const std::string& newParentLfn = eventMetaData->
getParentLfn();
714 if (parentLfn != newParentLfn) {
721 branch->SetAddress(address);
724 tree->SetEventList(elist);
732 B2FATAL(entryOrigin <<
" in " <<
m_tree->GetFile()->GetName() <<
" does not contain required object " << name <<
", aborting.");
733 }
else if (fileChanged) {
734 B2WARNING(entryOrigin <<
" in " <<
m_tree->GetFile()->GetName() <<
" does not contain object " << name <<
735 " that was present in a previous entry.");
744 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
745 TObject* copyOfPreviousVersion =
nullptr;
747 copyOfPreviousVersion = entry->object->Clone();
749 entry->resetForGetEntry();
751 entry->ptr = copyOfPreviousVersion;
755 if (bytesRead <= 0) {
756 const char* name =
m_tree->GetCurrentFile() ?
m_tree->GetCurrentFile()->GetName() :
"<unknown>";
757 B2FATAL(
"Could not read 'persistent' TTree #" << fileEntry <<
" in file " << name);
762 bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
765 auto* newObj =
static_cast<Mergeable*
>(entry->object);
766 newObj->
merge(oldObj);
770 entry->ptr = entry->object;
773 entry->recoverFromNullObject();
774 entry->ptr =
nullptr;
781 if ((metaData.
getSite().find(
"bfe0") == 0) && (metaData.
getDate().compare(
"2019-06-30") < 0) &&
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void setInputMetadata(const std::vector< FileMetaData > &inputMetadata)
To be called by input modules with the list of all input FileMetaData.
@ c_WriteOut
Object/array should be saved by output modules.
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
EDurability
Durability types.
@ c_Persistent
Object is available during entire execution time.
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
static DataStore & Instance()
Instance of singleton Store.
bool registerEntry(const std::string &name, EDurability durability, TClass *objClass, bool array, EStoreFlags storeFlags)
Register an entry in the DataStore map.
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
unsigned int getSkipEventsOverride() const
Get skipNEvents override, or 0 if unset.
std::vector< std::string > getEntrySequencesOverride() const
Returns the number sequences (e.g.
static Environment & Instance()
Static method to get a reference to the Environment instance.
unsigned int getNumberEventsOverride() const
Returns number of events in run 1 for EventInfoSetter module, or 0 for no override.
void setNumberOfMCEvents(unsigned int n)
Set number of generated events (for EventInfoSetter).
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual std::string getPhysicalFileName(const std::string &lfn)
Get the physical file name for the LFN.
Abstract base class for objects that can be merged.
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
void setDescription(const std::string &description)
Sets the description of the module.
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
@ c_Input
This module is an input module (reads data).
Helper class to factorize some necessary tasks when working with Belle2 output files.
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
static ScopeGuard guardValue(T &reference)
Create a ScopeGuard for a value: The content of reference will be copied and reset when the returned ...
Type-safe access to single objects in the data store.
Class to store variables with their name which were sent to the logging service.
void addParam(const std::string &name, T ¶mVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
long getEntryNumberWithEvtRunExp(TTree *tree, long event, long run, long experiment)
return entry number with given (event, run, experiment) from tree.
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
Wraps a stored array/object, stored under unique (name, durability) key.
TObject * object
The pointer to the actual object.