10 #include <framework/modules/rootio/RootInputModule.h> 
   12 #include <framework/io/RootIOUtilities.h> 
   13 #include <framework/io/RootFileInfo.h> 
   14 #include <framework/core/FileCatalog.h> 
   15 #include <framework/core/InputController.h> 
   16 #include <framework/pcore/Mergeable.h> 
   17 #include <framework/datastore/StoreObjPtr.h> 
   18 #include <framework/datastore/DataStore.h> 
   19 #include <framework/datastore/DependencyMap.h> 
   20 #include <framework/dataobjects/EventMetaData.h> 
   21 #include <framework/utilities/NumberSequence.h> 
   22 #include <framework/utilities/ScopeGuard.h> 
   23 #include <framework/database/Configuration.h> 
   25 #include <TClonesArray.h> 
   26 #include <TEventList.h> 
   27 #include <TObjArray.h> 
   28 #include <TChainElement.h> 
   35 using namespace RootIOUtilities;
 
   39 RootInputModule::RootInputModule() : 
Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
 
   42   setDescription(
"Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
 
   46   vector<string> emptyvector;
 
   48            "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
 
   51            "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
 
   54            "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName." 
   55            "Must be specified exactly once for each file to be opened." 
   56            "The first event has the entry number 0.", emptyvector);
 
   58            "Ignore override of file name via command line argument -i.", 
false);
 
   62            "the specified (experiment, run, event number) occurs. This parameter " 
   63            "is useful for debugging to start with a specific event.", 
m_skipToEvent);
 
   66            "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
 
   68            "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
 
   71            "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
 
   72   vector<string> excludePersistent({
"ProcessStatistics"});
 
   74            "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
 
   77            "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
 
   81            "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
 
   84            "file cache size in Mbytes. If negative, use root default", 0);
 
   89            "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined " 
   90            "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags " 
   94            "When using a second RootInputModule in an independent path [usually if you are using add_independent_merge_path(...)] " 
   95            "this has to be set to true",
 
  104   if (skipNEventsOverride != 0)
 
  108   if (entrySequencesOverride.size() > 0)
 
  116   if (inputFiles.empty()) {
 
  117     B2FATAL(
"You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
 
  120     B2FATAL(
"Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
 
  124     B2FATAL(
"No valid files specified!");
 
  128     B2FATAL(
"Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = " 
  150   std::set<std::string> requiredEventBranches;
 
  151   std::set<std::string> requiredPersistentBranches;
 
  153   std::vector<FileMetaData> fileMetaData;
 
  162     bool validInputMCEvents{
true};
 
  169           B2WARNING(
"File appears to be empty, skipping" << 
LogVar(
"filename", fileName));
 
  173         fileMetaData.push_back(meta);
 
  175         if (fileMetaData.front().isMC() != meta.
isMC()) {
 
  176           throw std::runtime_error(
"Mixing real data and simulated data for input files is not supported");
 
  179         if (validInputMCEvents) {
 
  181           if ((sumInputMCEvents > 0 and meta.
getMcEvents() == 0)) {
 
  182             B2WARNING(
"inconsistent input files: zero mcEvents, setting total number of MC events to zero" << 
LogVar(
"filename", fileName));
 
  183             validInputMCEvents = 
false;
 
  186           if (__builtin_add_overflow(sumInputMCEvents, meta.
getMcEvents(), &sumInputMCEvents)) {
 
  187             B2FATAL(
"Number of MC events is too large and cannot be represented anymore");
 
  191         if (requiredEventBranches.empty()) {
 
  204           requiredEventBranches.emplace(
"EventMetaData");
 
  218           throw std::runtime_error(
"Could not add file to TChain");
 
  220         B2INFO(
"Added file " + fileName);
 
  221       } 
catch (std::exception& e) {
 
  222         B2FATAL(
"Could not open input file " << std::quoted(fileName) << 
": " << e.what());
 
  227   if (
m_tree->GetNtrees() == 0) B2FATAL(
"No file could be opened, aborting");
 
  237     std::set<std::string> unique_filenames;
 
  241     TObjArray* fileElements = 
m_tree->GetListOfFiles();
 
  242     TIter next(fileElements);
 
  243     TChainElement* chEl = 
nullptr;
 
  244     while ((chEl = (TChainElement*)next())) {
 
  245       if (!unique_filenames.insert(chEl->GetTitle()).second) {
 
  246         B2WARNING(
"The input file '" << chEl->GetTitle() << 
"' was specified more than once");
 
  252       B2FATAL(
"You specified a file multiple times, and specified a sequence of entries which should be used for each file. " 
  253               "Please specify each file only once if you're using the sequence feature!");
 
  258     auto* elist = 
new TEventList(
"input_event_list");
 
  260       int64_t offset = 
m_tree->GetTreeOffset()[iFile];
 
  261       int64_t next_offset = 
m_tree->GetTreeOffset()[iFile + 1];
 
  264         for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
 
  265           elist->Enter(global_entry);
 
  268           int64_t global_entry = entry + offset;
 
  269           if (global_entry >= next_offset) {
 
  270             B2WARNING(
"Given sequence contains entry numbers which are out of range. " 
  271                       "I won't add any further events to the EventList for the current file.");
 
  274             elist->Enter(global_entry);
 
  279     m_tree->SetEventList(elist);
 
  299     B2ERROR(
"parentLevel must be >= 0!");
 
  316       B2ERROR(
"skipToEvent must be a list of three values: experiment, run, event number");
 
  323       B2ERROR(
"You cannot supply a number of events to skip (skipNEvents) and an " 
  324               "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
 
  355         B2INFO(
"RootInput: will read entry " << nextEntry << 
" next.");
 
  363         const long chainentry = 
m_tree->GetChainEntryNumber(entry);
 
  364         B2INFO(
"RootInput: will read entry " << chainentry << 
" (entry " << entry << 
" in current file) next.");
 
  377     unsigned int errorFlag = 0;
 
  380       errorFlag = eventMetaData->getErrorFlag();
 
  381       if (errorFlag != 0) {
 
  383           B2WARNING(
"Discarding corrupted event" << 
LogVar(
"errorFlag", errorFlag) << 
LogVar(
"experiment", eventMetaData->getExperiment())
 
  384                     << 
LogVar(
"run", eventMetaData->getRun()) << 
LogVar(
"event", eventMetaData->getEvent()));
 
  387         eventMetaData->setEndOfData();
 
  390     if (errorFlag == 0) 
break;
 
  405     TFile* f = entry.second->GetCurrentFile();
 
  414     B2INFO(
"Statistics for event tree (parent files): " << parentReadStats.
getString());
 
  441     localEntryNumber = 
m_tree->GetEntryNumber(localEntryNumber);
 
  443   localEntryNumber = 
m_tree->LoadTree(localEntryNumber);
 
  445   if (localEntryNumber == -2) {
 
  448   } 
else if (localEntryNumber < 0) {
 
  449     B2FATAL(
"Failed to load tree, corrupt file? Check standard error for additional messages. TChain::LoadTree() returned" <<
 
  450             LogVar(
"error", localEntryNumber));
 
  456     entry->resetForGetEntry();
 
  459     for (
auto entry : storeEntries) {
 
  460       entry->resetForGetEntry();
 
  464   int bytesRead = 
m_tree->GetTree()->GetEntry(localEntryNumber);
 
  465   if (bytesRead <= 0) {
 
  466     B2FATAL(
"Could not read 'tree' entry " << 
m_nextEntry << 
" in file " << 
m_tree->GetCurrentFile()->GetName());
 
  477   const long treeNum = 
m_tree->GetTreeNumber();
 
  485     B2INFO(
"Loading new input file" 
  487            << 
LogVar(
"metadata LFN", fileMetaData->getLfn()));
 
  492     if (!entry->object) {
 
  493       entryNotFound(
"Event durability tree (global entry: " + std::to_string(
m_nextEntry) + 
")", entry->name, fileChanged);
 
  494       entry->recoverFromNullObject();
 
  495       entry->ptr = 
nullptr;
 
  497       entry->ptr = entry->object;
 
  503       B2FATAL(
"Could not read data from parent file!");
 
  509     if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
 
  512     for (
auto entry : storeEntries) {
 
  513       if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
 
  520   B2DEBUG(30, 
"File changed, loading persistent data.");
 
  523   const TObjArray* branchesObjArray = tree->GetListOfBranches();
 
  524   if (!branchesObjArray) {
 
  525     B2FATAL(
"Tree '" << tree->GetName() << 
"' doesn't contain any branches!");
 
  527   std::vector<TBranch*> branches;
 
  528   set<string> branchList;
 
  529   for (
int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
 
  530     auto* branch = 
static_cast<TBranch*
>(branchesObjArray->At(jj));
 
  531     if (!branch) 
continue;
 
  532     branchList.insert(branch->GetName());
 
  533     branches.emplace_back(branch);
 
  539   for (TBranch* branch : branches) {
 
  540     const std::string branchName = branch->GetName();
 
  545     if ((branchList.count(branchName) == 0) and
 
  546         ((branchName != 
"FileMetaData") || (tree != 
m_persistent)) and
 
  547         ((branchName != 
"EventMetaData") || (tree != 
m_tree))) {
 
  551     B2DEBUG(32, 
"Enabling branch" << 
LogVar(
"branchName", branchName)
 
  552             << 
LogVar(
"children found", found));
 
  555     TObject* objectPtr = 
nullptr;
 
  556     branch->SetAddress(&objectPtr);
 
  558     bool array = (string(branch->GetClassName()) == 
"TClonesArray");
 
  559     TClass* objClass = 
nullptr;
 
  561       objClass = (
static_cast<TClonesArray*
>(objectPtr))->GetClass();
 
  563       objClass = objectPtr->IsA();
 
  568       B2FATAL(
"Cannot connect branch to datastore" << 
LogVar(
"branchName", branchName));
 
  572     tree->SetBranchAddress(branch->GetName(), &(entry.
object));
 
  573     if (storeEntries) storeEntries->push_back(&entry);
 
  586   TBranch* branch = 
m_tree->GetBranch(
"EventMetaData");
 
  587   char* address = branch->GetAddress();
 
  589   branch->SetAddress(&eventMetaData);
 
  592   int run = eventMetaData->
getRun();
 
  593   unsigned int event = eventMetaData->
getEvent();
 
  595   branch->SetAddress(address);
 
  600     TDirectory* dir = gDirectory;
 
  602     TFile* file = TFile::Open(parentPfn.c_str(), 
"READ");
 
  604     if (!file || !file->IsOpen()) {
 
  605       B2ERROR(
"Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?" 
  606               << 
LogVar(
"LFN", parentLfn) << 
LogVar(
"PFN", parentPfn));
 
  630     tree->SetBranchAddress(
"EventMetaData", &metaData);
 
  632     tree->GetBranch(
"EventMetaData")->GetEntry(entry);
 
  643   int experiment = eventMetaData->getExperiment();
 
  644   int run = eventMetaData->getRun();
 
  645   unsigned int event = eventMetaData->getEvent();
 
  647   std::string parentLfn = eventMetaData->getParentLfn();
 
  652     TTree* tree = 
nullptr;
 
  654       TDirectory* dir = gDirectory;
 
  655       B2DEBUG(30, 
"Opening parent file" << 
LogVar(
"PFN", parentPfn));
 
  656       TFile* file = TFile::Open(parentPfn.c_str(), 
"READ");
 
  658       if (!file || !file->IsOpen()) {
 
  659         B2ERROR(
"Couldn't open parent file " << parentPfn);
 
  668         tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
 
  677     if (entryNumber < 0) {
 
  678       B2ERROR(
"No event " << experiment << 
"/" << run << 
"/" << 
event << 
" in parent file " << parentPfn);
 
  684     tree->SetBranchAddress(
"EventMetaData", &parentMetaData);
 
  685     tree->GetEntry(entryNumber);
 
  687       entry->ptr = entry->object;
 
  709   B2INFO(
"Index file detected, scanning to generate event list.");
 
  713   auto* elist = 
new TEventList(
"parent_entrylist");
 
  716   TBranch* branch = 
m_tree->GetBranch(
"EventMetaData");
 
  717   auto* address = branch->GetAddress();
 
  719   branch->SetAddress(&eventMetaData);
 
  720   long nEntries = 
m_tree->GetEntries();
 
  724     int run = eventMetaData->
getRun();
 
  725     unsigned int event = eventMetaData->
getEvent();
 
  726     const std::string& newParentLfn = eventMetaData->
getParentLfn();
 
  728     if (parentLfn != newParentLfn) {
 
  735   branch->SetAddress(address);
 
  738     tree->SetEventList(elist);
 
  746     B2FATAL(entryOrigin << 
" in " << 
m_tree->GetFile()->GetName() << 
" does not contain required object " << name << 
", aborting.");
 
  747   } 
else if (fileChanged) {
 
  748     B2WARNING(entryOrigin << 
" in " << 
m_tree->GetFile()->GetName() << 
" does not contain object " << name <<
 
  749               " that was present in a previous entry.");
 
  758     bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
 
  759     TObject* copyOfPreviousVersion = 
nullptr;
 
  761       copyOfPreviousVersion = entry->object->Clone();
 
  763     entry->resetForGetEntry();
 
  765     entry->ptr = copyOfPreviousVersion;
 
  769   if (bytesRead <= 0) {
 
  770     const char* name = 
m_tree->GetCurrentFile() ? 
m_tree->GetCurrentFile()->GetName() : 
"<unknown>";
 
  771     B2FATAL(
"Could not read 'persistent' TTree #" << fileEntry << 
" in file " << name);
 
  776       bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
 
  779         auto* newObj = 
static_cast<Mergeable*
>(entry->object);
 
  780         newObj->
merge(oldObj);
 
  784       entry->ptr = entry->object;
 
  787       entry->recoverFromNullObject();
 
  788       entry->ptr = 
nullptr;
 
  795   if ((metaData.
getSite().find(
"bfe0") == 0) && (metaData.
getDate().compare(
"2019-06-30") < 0) &&
 
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void setInputMetadata(const std::vector< FileMetaData > &inputMetadata)
To be called by input modules with the list of all input FileMetaData.
@ c_WriteOut
Object/array should be saved by output modules.
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
EDurability
Durability types.
@ c_Persistent
Object is available during entire execution time.
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
static DataStore & Instance()
Instance of singleton Store.
bool registerEntry(const std::string &name, EDurability durability, TClass *objClass, bool array, EStoreFlags storeFlags)
Register an entry in the DataStore map.
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
unsigned int getSkipEventsOverride() const
Get skipNEvents override, or 0 if unset.
std::vector< std::string > getEntrySequencesOverride() const
Returns the number sequences (e.g.
static Environment & Instance()
Static method to get a reference to the Environment instance.
unsigned int getNumberEventsOverride() const
Returns number of events in run 1 for EventInfoSetter module, or 0 for no override.
void setNumberOfMCEvents(unsigned int n)
Set number of generated events (for EventInfoSetter).
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual std::string getPhysicalFileName(const std::string &lfn)
Get the physical file name for the LFN.
Abstract base class for objects that can be merged.
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
void setDescription(const std::string &description)
Sets the description of the module.
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
@ c_Input
This module is an input module (reads data).
Helper class to factorize some necessary tasks when working with Belle2 output files.
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
static ScopeGuard guardValue(T &reference)
Create a ScopeGuard for a value: The content of reference will be copied and reset when the returned ...
Type-safe access to single objects in the data store.
Class to store variables with their name which were sent to the logging service.
void addParam(const std::string &name, T ¶mVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
long getEntryNumberWithEvtRunExp(TTree *tree, long event, long run, long experiment)
return entry number with given (event, run, experiment) from tree.
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
Wraps a stored array/object, stored under unique (name, durability) key.
TObject * object
The pointer to the actual object.