10 #include <framework/modules/rootio/RootInputModule.h> 
   12 #include <framework/io/RootIOUtilities.h> 
   13 #include <framework/io/RootFileInfo.h> 
   14 #include <framework/core/FileCatalog.h> 
   15 #include <framework/core/InputController.h> 
   16 #include <framework/pcore/Mergeable.h> 
   17 #include <framework/datastore/StoreObjPtr.h> 
   18 #include <framework/datastore/DataStore.h> 
   19 #include <framework/datastore/DependencyMap.h> 
   20 #include <framework/dataobjects/EventMetaData.h> 
   21 #include <framework/utilities/NumberSequence.h> 
   22 #include <framework/utilities/ScopeGuard.h> 
   23 #include <framework/database/Configuration.h> 
   25 #include <TClonesArray.h> 
   26 #include <TEventList.h> 
   27 #include <TObjArray.h> 
   28 #include <TChainElement.h> 
   35 using namespace RootIOUtilities;
 
   39 RootInputModule::RootInputModule() : 
Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
 
   42   setDescription(
"Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
 
   46   vector<string> emptyvector;
 
   48            "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
 
   51            "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
 
   54            "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName." 
   55            "Must be specified exactly once for each file to be opened." 
   56            "The first event has the entry number 0.", emptyvector);
 
   58            "Ignore override of file name via command line argument -i.", 
false);
 
   62            "the specified (experiment, run, event number) occurs. This parameter " 
   63            "is useful for debugging to start with a specific event.", 
m_skipToEvent);
 
   66            "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
 
   68            "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
 
   71            "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
 
   72   vector<string> excludePersistent({
"ProcessStatistics"});
 
   74            "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
 
   77            "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
 
   81            "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
 
   84            "file cache size in Mbytes. If negative, use root default", 0);
 
   89            "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined " 
   90            "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags " 
   99   if (skipNEventsOverride != 0)
 
  103   if (entrySequencesOverride.size() > 0)
 
  111   if (inputFiles.empty()) {
 
  112     B2FATAL(
"You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
 
  115     B2FATAL(
"Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
 
  119     B2FATAL(
"No valid files specified!");
 
  123     B2FATAL(
"Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = " 
  145   std::set<std::string> requiredEventBranches;
 
  146   std::set<std::string> requiredPersistentBranches;
 
  148   std::vector<FileMetaData> fileMetaData;
 
  157     bool validInputMCEvents{
true};
 
  164           B2WARNING(
"File appears to be empty, skipping" << 
LogVar(
"filename", fileName));
 
  168         fileMetaData.push_back(meta);
 
  170         if (fileMetaData.front().isMC() != meta.
isMC()) {
 
  171           throw std::runtime_error(
"Mixing real data and simulated data for input files is not supported");
 
  174         if (validInputMCEvents) {
 
  176           if ((sumInputMCEvents > 0 and meta.
getMcEvents() == 0)) {
 
  177             B2WARNING(
"inconsistent input files: zero mcEvents, setting total number of MC events to zero" << 
LogVar(
"filename", fileName));
 
  178             validInputMCEvents = 
false;
 
  181           if (__builtin_add_overflow(sumInputMCEvents, meta.
getMcEvents(), &sumInputMCEvents)) {
 
  182             B2FATAL(
"Number of MC events is too large and cannot be represented anymore");
 
  186         if (requiredEventBranches.empty()) {
 
  199           requiredEventBranches.emplace(
"EventMetaData");
 
  213           throw std::runtime_error(
"Could not add file to TChain");
 
  215         B2INFO(
"Added file " + fileName);
 
  216       } 
catch (std::exception& e) {
 
  217         B2FATAL(
"Could not open input file " << std::quoted(fileName) << 
": " << e.what());
 
  222   if (
m_tree->GetNtrees() == 0) B2FATAL(
"No file could be opened, aborting");
 
  232     std::set<std::string> unique_filenames;
 
  236     TObjArray* fileElements = 
m_tree->GetListOfFiles();
 
  237     TIter next(fileElements);
 
  238     TChainElement* chEl = 
nullptr;
 
  239     while ((chEl = (TChainElement*)next())) {
 
  240       if (!unique_filenames.insert(chEl->GetTitle()).second) {
 
  241         B2WARNING(
"The input file '" << chEl->GetTitle() << 
"' was specified more than once");
 
  247       B2FATAL(
"You specified a file multiple times, and specified a sequence of entries which should be used for each file. " 
  248               "Please specify each file only once if you're using the sequence feature!");
 
  253     auto* elist = 
new TEventList(
"input_event_list");
 
  255       int64_t offset = 
m_tree->GetTreeOffset()[iFile];
 
  256       int64_t next_offset = 
m_tree->GetTreeOffset()[iFile + 1];
 
  259         for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
 
  260           elist->Enter(global_entry);
 
  263           int64_t global_entry = entry + offset;
 
  264           if (global_entry >= next_offset) {
 
  265             B2WARNING(
"Given sequence contains entry numbers which are out of range. " 
  266                       "I won't add any further events to the EventList for the current file.");
 
  269             elist->Enter(global_entry);
 
  274     m_tree->SetEventList(elist);
 
  294     B2ERROR(
"parentLevel must be >= 0!");
 
  311       B2ERROR(
"skipToEvent must be a list of three values: experiment, run, event number");
 
  318       B2ERROR(
"You cannot supply a number of events to skip (skipNEvents) and an " 
  319               "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
 
  342       B2INFO(
"RootInput: will read entry " << nextEntry << 
" next.");
 
  349         const long chainentry = 
m_tree->GetChainEntryNumber(entry);
 
  350         B2INFO(
"RootInput: will read entry " << chainentry << 
" (entry " << entry << 
" in current file) next.");
 
  363     unsigned int errorFlag = 0;
 
  366       errorFlag = eventMetaData->getErrorFlag();
 
  367       if (errorFlag != 0) {
 
  369           B2WARNING(
"Discarding corrupted event" << 
LogVar(
"errorFlag", errorFlag) << 
LogVar(
"experiment", eventMetaData->getExperiment())
 
  370                     << 
LogVar(
"run", eventMetaData->getRun()) << 
LogVar(
"event", eventMetaData->getEvent()));
 
  373         eventMetaData->setEndOfData();
 
  376     if (errorFlag == 0) 
break;
 
  391     TFile* f = entry.second->GetCurrentFile();
 
  400     B2INFO(
"Statistics for event tree (parent files): " << parentReadStats.
getString());
 
  427     localEntryNumber = 
m_tree->GetEntryNumber(localEntryNumber);
 
  429   localEntryNumber = 
m_tree->LoadTree(localEntryNumber);
 
  431   if (localEntryNumber == -2) {
 
  434   } 
else if (localEntryNumber < 0) {
 
  435     B2FATAL(
"Failed to load tree, corrupt file? Check standard error for additional messages. TChain::LoadTree() returned" <<
 
  436             LogVar(
"error", localEntryNumber));
 
  442     entry->resetForGetEntry();
 
  445     for (
auto entry : storeEntries) {
 
  446       entry->resetForGetEntry();
 
  450   int bytesRead = 
m_tree->GetTree()->GetEntry(localEntryNumber);
 
  451   if (bytesRead <= 0) {
 
  452     B2FATAL(
"Could not read 'tree' entry " << 
m_nextEntry << 
" in file " << 
m_tree->GetCurrentFile()->GetName());
 
  463   const long treeNum = 
m_tree->GetTreeNumber();
 
  471     B2INFO(
"Loading new input file" 
  473            << 
LogVar(
"metadata LFN", fileMetaData->getLfn()));
 
  478     if (!entry->object) {
 
  479       entryNotFound(
"Event durability tree (global entry: " + std::to_string(
m_nextEntry) + 
")", entry->name, fileChanged);
 
  480       entry->recoverFromNullObject();
 
  481       entry->ptr = 
nullptr;
 
  483       entry->ptr = entry->object;
 
  489       B2FATAL(
"Could not read data from parent file!");
 
  495     if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
 
  498     for (
auto entry : storeEntries) {
 
  499       if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
 
  506   B2DEBUG(30, 
"File changed, loading persistent data.");
 
  509   const TObjArray* branchesObjArray = tree->GetListOfBranches();
 
  510   if (!branchesObjArray) {
 
  511     B2FATAL(
"Tree '" << tree->GetName() << 
"' doesn't contain any branches!");
 
  513   std::vector<TBranch*> branches;
 
  514   set<string> branchList;
 
  515   for (
int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
 
  516     auto* branch = 
static_cast<TBranch*
>(branchesObjArray->At(jj));
 
  517     if (!branch) 
continue;
 
  518     branchList.insert(branch->GetName());
 
  519     branches.emplace_back(branch);
 
  525   for (TBranch* branch : branches) {
 
  526     const std::string branchName = branch->GetName();
 
  531     if ((branchList.count(branchName) == 0) and
 
  532         ((branchName != 
"FileMetaData") || (tree != 
m_persistent)) and
 
  533         ((branchName != 
"EventMetaData") || (tree != 
m_tree))) {
 
  537     B2DEBUG(32, 
"Enabling branch" << 
LogVar(
"branchName", branchName)
 
  538             << 
LogVar(
"children found", found));
 
  541     TObject* objectPtr = 
nullptr;
 
  542     branch->SetAddress(&objectPtr);
 
  544     bool array = (string(branch->GetClassName()) == 
"TClonesArray");
 
  545     TClass* objClass = 
nullptr;
 
  547       objClass = (
static_cast<TClonesArray*
>(objectPtr))->GetClass();
 
  549       objClass = objectPtr->IsA();
 
  554       B2FATAL(
"Cannot connect branch to datastore" << 
LogVar(
"branchName", branchName));
 
  558     tree->SetBranchAddress(branch->GetName(), &(entry.
object));
 
  559     if (storeEntries) storeEntries->push_back(&entry);
 
  572   TBranch* branch = 
m_tree->GetBranch(
"EventMetaData");
 
  573   char* address = branch->GetAddress();
 
  575   branch->SetAddress(&eventMetaData);
 
  578   int run = eventMetaData->
getRun();
 
  579   unsigned int event = eventMetaData->
getEvent();
 
  581   branch->SetAddress(address);
 
  586     TDirectory* dir = gDirectory;
 
  588     TFile* file = TFile::Open(parentPfn.c_str(), 
"READ");
 
  590     if (!file || !file->IsOpen()) {
 
  591       B2ERROR(
"Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?" 
  592               << 
LogVar(
"LFN", parentLfn) << 
LogVar(
"PFN", parentPfn));
 
  616     tree->SetBranchAddress(
"EventMetaData", &metaData);
 
  618     tree->GetBranch(
"EventMetaData")->GetEntry(entry);
 
  629   int experiment = eventMetaData->getExperiment();
 
  630   int run = eventMetaData->getRun();
 
  631   unsigned int event = eventMetaData->getEvent();
 
  633   std::string parentLfn = eventMetaData->getParentLfn();
 
  638     TTree* tree = 
nullptr;
 
  640       TDirectory* dir = gDirectory;
 
  641       B2DEBUG(30, 
"Opening parent file" << 
LogVar(
"PFN", parentPfn));
 
  642       TFile* file = TFile::Open(parentPfn.c_str(), 
"READ");
 
  644       if (!file || !file->IsOpen()) {
 
  645         B2ERROR(
"Couldn't open parent file " << parentPfn);
 
  654         tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
 
  663     if (entryNumber < 0) {
 
  664       B2ERROR(
"No event " << experiment << 
"/" << run << 
"/" << 
event << 
" in parent file " << parentPfn);
 
  670     tree->SetBranchAddress(
"EventMetaData", &parentMetaData);
 
  671     tree->GetEntry(entryNumber);
 
  673       entry->ptr = entry->object;
 
  695   B2INFO(
"Index file detected, scanning to generate event list.");
 
  699   auto* elist = 
new TEventList(
"parent_entrylist");
 
  702   TBranch* branch = 
m_tree->GetBranch(
"EventMetaData");
 
  703   auto* address = branch->GetAddress();
 
  705   branch->SetAddress(&eventMetaData);
 
  706   long nEntries = 
m_tree->GetEntries();
 
  710     int run = eventMetaData->
getRun();
 
  711     unsigned int event = eventMetaData->
getEvent();
 
  712     const std::string& newParentLfn = eventMetaData->
getParentLfn();
 
  714     if (parentLfn != newParentLfn) {
 
  721   branch->SetAddress(address);
 
  724     tree->SetEventList(elist);
 
  732     B2FATAL(entryOrigin << 
" in " << 
m_tree->GetFile()->GetName() << 
" does not contain required object " << name << 
", aborting.");
 
  733   } 
else if (fileChanged) {
 
  734     B2WARNING(entryOrigin << 
" in " << 
m_tree->GetFile()->GetName() << 
" does not contain object " << name <<
 
  735               " that was present in a previous entry.");
 
  744     bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
 
  745     TObject* copyOfPreviousVersion = 
nullptr;
 
  747       copyOfPreviousVersion = entry->object->Clone();
 
  749     entry->resetForGetEntry();
 
  751     entry->ptr = copyOfPreviousVersion;
 
  755   if (bytesRead <= 0) {
 
  756     const char* name = 
m_tree->GetCurrentFile() ? 
m_tree->GetCurrentFile()->GetName() : 
"<unknown>";
 
  757     B2FATAL(
"Could not read 'persistent' TTree #" << fileEntry << 
" in file " << name);
 
  762       bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
 
  765         auto* newObj = 
static_cast<Mergeable*
>(entry->object);
 
  766         newObj->
merge(oldObj);
 
  770       entry->ptr = entry->object;
 
  773       entry->recoverFromNullObject();
 
  774       entry->ptr = 
nullptr;
 
  781   if ((metaData.
getSite().find(
"bfe0") == 0) && (metaData.
getDate().compare(
"2019-06-30") < 0) &&
 
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void setInputMetadata(const std::vector< FileMetaData > &inputMetadata)
To be called by input modules with the list of all input FileMetaData.
@ c_WriteOut
Object/array should be saved by output modules.
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
EDurability
Durability types.
@ c_Persistent
Object is available during entire execution time.
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
static DataStore & Instance()
Instance of singleton Store.
bool registerEntry(const std::string &name, EDurability durability, TClass *objClass, bool array, EStoreFlags storeFlags)
Register an entry in the DataStore map.
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
unsigned int getSkipEventsOverride() const
Get skipNEvents override, or 0 if unset.
std::vector< std::string > getEntrySequencesOverride() const
Returns the number sequences (e.g.
static Environment & Instance()
Static method to get a reference to the Environment instance.
unsigned int getNumberEventsOverride() const
Returns number of events in run 1 for EventInfoSetter module, or 0 for no override.
void setNumberOfMCEvents(unsigned int n)
Set number of generated events (for EventInfoSetter).
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual std::string getPhysicalFileName(const std::string &lfn)
Get the physical file name for the LFN.
Abstract base class for objects that can be merged.
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
void setDescription(const std::string &description)
Sets the description of the module.
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
@ c_Input
This module is an input module (reads data).
Helper class to factorize some necessary tasks when working with Belle2 output files.
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
static ScopeGuard guardValue(T &reference)
Create a ScopeGuard for a value: The content of reference will be copied and reset when the returned ...
Type-safe access to single objects in the data store.
Class to store variables with their name which were sent to the logging service.
void addParam(const std::string &name, T ¶mVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
long getEntryNumberWithEvtRunExp(TTree *tree, long event, long run, long experiment)
return entry number with given (event, run, experiment) from tree.
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
Wraps a stored array/object, stored under unique (name, durability) key.
TObject * object
The pointer to the actual object.