Belle II Software  release-08-01-10
RootInputModule.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 
11 #include <framework/core/Module.h>
12 #include <framework/datastore/DataStore.h>
13 #include <framework/core/Environment.h>
14 #include <framework/dataobjects/FileMetaData.h>
15 #include <framework/dataobjects/EventMetaData.h>
16 
17 #include <string>
18 #include <vector>
19 #include <set>
20 
21 #include <TChain.h>
22 #include <TFile.h>
23 
24 
25 namespace Belle2 {
42  class RootInputModule : public Module {
43  public:
44 
47 
49  virtual ~RootInputModule();
50 
52  virtual void initialize() override;
53 
55  virtual void event() override;
56 
58  virtual void terminate() override;
59 
61  virtual std::vector<std::string> getFileNames(bool outputFiles = false) override
62  {
63  B2ASSERT("RootInput is not an output module", !outputFiles);
64  std::vector<std::string> inputFiles = !m_isSecondaryInput ? Environment::Instance().getInputFilesOverride() :
66  if (!m_ignoreCommandLineOverride and !inputFiles.empty()) {
67  return inputFiles;
68  }
69  inputFiles = m_inputFileNames;
70  if (!m_inputFileName.empty())
71  inputFiles.push_back(m_inputFileName);
72  return inputFiles;
73  }
74 
75  protected:
76 
77 
78  private:
79  typedef std::vector<DataStore::StoreEntry*> StoreEntries;
82  void readTree();
83 
92  bool connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries);
93 
96 
98  bool readParentTrees();
99 
101  void readPersistentEntry(long fileEntry);
102 
104  void entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged = true);
105 
107  void addEventListForIndexFile(const std::string& parentLfn);
108 
110  void realDataWorkaround(FileMetaData& metaData);
111 
112  //first the steerable variables:
114  std::string m_inputFileName;
115 
117  std::vector<std::string> m_inputFileNames;
118 
122  std::vector<std::string> m_entrySequences;
123 
126 
133  std::vector<std::string> m_branchNames[DataStore::c_NDurabilityTypes];
134 
141 
142 
144  unsigned int m_skipNEvents;
145 
148 
151 
153  std::vector<int> m_skipToEvent;
154 
155  //then those for purely internal use:
156 
159 
162 
164  std::string m_lastParentFileLFN;
165 
166 
168  TChain* m_tree;
169 
171  TChain* m_persistent;
172 
175 
180 
182  std::vector<StoreEntries> m_parentStoreEntries;
183 
185  std::map<std::string, TTree*> m_parentTrees;
186 
188  struct ReadStats {
189  long calls{0};
190  long bytesRead{0};
191  long bytesReadExtra{0};
193  void add(const ReadStats& b)
194  {
195  calls += b.calls;
196  bytesRead += b.bytesRead;
197  bytesReadExtra += b.bytesReadExtra;
198  }
200  void addFromFile(const TFile* f)
201  {
202  calls += f->GetReadCalls();
203  bytesRead += f->GetBytesRead();
204  bytesReadExtra += f->GetBytesReadExtra();
205  }
207  std::string getString() const
208  {
209  std::string s;
210  s += "read: " + std::to_string(bytesRead) + " Bytes";
211  s += ", overhead: " + std::to_string(bytesReadExtra) + " Bytes";
212  s += ", Read() calls: " + std::to_string(calls);
213  return s;
214  }
215  };
216 
219 
221  int m_cacheSize{0};
222 
227 
230 
234  bool m_isSecondaryInput{false};
235  };
237 } // end namespace Belle2
static const int c_NDurabilityTypes
Number of Durability Types.
Definition: DataStore.h:63
EDurability
Durability types.
Definition: DataStore.h:58
const std::vector< std::string > & getInputFilesOverride() const
Return overriden input file names, or empty vector if none were set.
Definition: Environment.h:103
const std::vector< std::string > & getSecondaryInputFilesOverride() const
Return overriden secondary input file names, or empty vector if none were set.
Definition: Environment.h:109
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
@ c_HLTDiscard
The HLT discarded the event and only metadata is kept.
Definition: EventMetaData.h:48
Metadata information about a file.
Definition: FileMetaData.h:29
Base class for Modules.
Definition: Module.h:72
Module to read TTree data from file into the data store.
bool m_discardErrorEvents
Discard events that have an error flag != 0.
StoreEntries m_persistentStoreEntries
Vector of DataStore entries of persistent durability that we are supposed to read in.
void realDataWorkaround(FileMetaData &metaData)
Correct isMC flag for raw data recorded before experiment 8 run 2364.
std::map< std::string, TTree * > m_parentTrees
Map of file LFNs to trees.
unsigned int m_discardErrorMask
Don't issue a warning when discarding events if the error flag consists exclusively of flags in this ...
bool m_isSecondaryInput
When using a second RootInputModule in an independent path [usually if you are using add_independent_...
bool m_collectStatistics
Collect statistics on amount of data read and print statistics (seperate for input & parent files) af...
long m_lastPersistentEntry
last entry to be in persistent tree.
StoreEntries m_storeEntries
Vector of DataStore entries of event durability that we are supposed to read in.
virtual void initialize() override
Initialize the Module.
TChain * m_tree
TTree for event input.
virtual void event() override
Running over all events.
int m_parentLevel
Level of parent files to be read.
std::vector< StoreEntries > m_parentStoreEntries
The parent DataStore entries per level.
virtual void terminate() override
Is called at the end of your Module.
std::string m_inputFileName
File to read from.
TChain * m_persistent
TTree for persistent input.
std::string m_lastParentFileLFN
last parent file LFN seen.
std::vector< std::string > m_inputFileNames
Files to read from.
void readPersistentEntry(long fileEntry)
Loads given entry from persistent tree.
virtual ~RootInputModule()
Destructor.
std::vector< std::string > m_excludeBranchNames[DataStore::c_NDurabilityTypes]
Array for names of branches that should NOT be written out.
int m_cacheSize
Input ROOT File Cache size in MB, <0 means default.
long m_nextEntry
Next entry to be read in event tree.
std::set< std::string > m_connectedBranches[DataStore::c_NDurabilityTypes]
Already connected branches.
void entryNotFound(const std::string &entryOrigin, const std::string &name, bool fileChanged=true)
Check if we warn the user or abort after an entry was missing after changing files.
virtual std::vector< std::string > getFileNames(bool outputFiles=false) override
Get list of input files, taking -i command line overrides into account.
bool m_ignoreCommandLineOverride
Ignore filename override from command line.
bool createParentStoreEntries()
Connect the parent trees and fill m_parentStoreEntries.
void addEventListForIndexFile(const std::string &parentLfn)
For index files, this creates TEventList/TEntryListArray to enable better cache use.
RootInputModule()
Constructor.
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
bool m_processingAllEvents
Set to true if we process the input files completely: No skip events or sequences or -n parameters.
std::vector< int > m_skipToEvent
experiment, run, event number of first event to load
ReadStats m_readStats
some statistics for all files read so far.
unsigned int m_skipNEvents
Can be set from steering file to skip the first N events.
void readTree()
Actually performs the reading from the tree.
bool readParentTrees()
Read data of the current event from the parents.
std::vector< std::string > m_branchNames[DataStore::c_NDurabilityTypes]
Array for names of branches, that shall be written out.
std::vector< std::string > m_entrySequences
The number sequences (e.g.
bool connectBranches(TTree *tree, DataStore::EDurability durability, StoreEntries *storeEntries)
Connect branches of the given tree to the data store.
Abstract base class for different kinds of events.
for collecting statistics over multiple files.
std::string getString() const
string suitable for printing.
long calls
number of read calls.
void add(const ReadStats &b)
add other stats object.
void addFromFile(const TFile *f)
add current statistics from TFile object.
long bytesRead
total number of bytes read.
long bytesReadExtra
what TFile thinks was the overhead.