Belle II Software  release-06-00-14
RootInputModule.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 
11 #include <framework/core/Module.h>
12 #include <framework/datastore/DataStore.h>
13 #include <framework/core/Environment.h>
14 #include <framework/dataobjects/FileMetaData.h>
15 #include <framework/dataobjects/EventMetaData.h>
16 
17 #include <string>
18 #include <vector>
19 #include <set>
20 
21 #include <TChain.h>
22 #include <TFile.h>
23 
24 
25 namespace Belle2 {
42  class RootInputModule : public Module {
43  public:
44 
47 
49  virtual ~RootInputModule();
50 
52  virtual void initialize() override;
53 
55  virtual void event() override;
56 
58  virtual void terminate() override;
59 
61  virtual std::vector<std::string> getFileNames(bool outputFiles = false) override
62  {
63  B2ASSERT("RootInput is not an output module", !outputFiles);
64  std::vector<std::string> inputFiles = Environment::Instance().getInputFilesOverride();
65  if (!m_ignoreCommandLineOverride and !inputFiles.empty()) {
66  return inputFiles;
67  }
68  inputFiles = m_inputFileNames;
69  if (!m_inputFileName.empty())
70  inputFiles.push_back(m_inputFileName);
71  return inputFiles;
72  }
73 
74  protected:
75 
76 
77  private:
78  typedef std::vector<DataStore::StoreEntry*> StoreEntries;
81  void readTree();
82 
91  bool connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries);
92 
95 
97  bool readParentTrees();
98 
100  void readPersistentEntry(long fileEntry);
101 
103  void entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged = true);
104 
106  void addEventListForIndexFile(const std::string& parentLfn);
107 
109  void realDataWorkaround(FileMetaData& metaData);
110 
111  //first the steerable variables:
113  std::string m_inputFileName;
114 
116  std::vector<std::string> m_inputFileNames;
117 
121  std::vector<std::string> m_entrySequences;
122 
125 
132  std::vector<std::string> m_branchNames[DataStore::c_NDurabilityTypes];
133 
140 
141 
143  unsigned int m_skipNEvents;
144 
147 
150 
152  std::vector<int> m_skipToEvent;
153 
154  //then those for purely internal use:
155 
158 
161 
163  std::string m_lastParentFileLFN;
164 
165 
167  TChain* m_tree;
168 
170  TChain* m_persistent;
171 
174 
179 
181  std::vector<StoreEntries> m_parentStoreEntries;
182 
184  std::map<std::string, TTree*> m_parentTrees;
185 
187  struct ReadStats {
188  long calls{0};
189  long bytesRead{0};
190  long bytesReadExtra{0};
192  void add(const ReadStats& b)
193  {
194  calls += b.calls;
195  bytesRead += b.bytesRead;
196  bytesReadExtra += b.bytesReadExtra;
197  }
199  void addFromFile(const TFile* f)
200  {
201  calls += f->GetReadCalls();
202  bytesRead += f->GetBytesRead();
203  bytesReadExtra += f->GetBytesReadExtra();
204  }
206  std::string getString() const
207  {
208  std::string s;
209  s += "read: " + std::to_string(bytesRead) + " Bytes";
210  s += ", overhead: " + std::to_string(bytesReadExtra) + " Bytes";
211  s += ", Read() calls: " + std::to_string(calls);
212  return s;
213  }
214  };
215 
218 
220  int m_cacheSize{0};
221 
226 
229  };
231 } // end namespace Belle2
static const int c_NDurabilityTypes
Number of Durability Types.
Definition: DataStore.h:63
EDurability
Durability types.
Definition: DataStore.h:58
const std::vector< std::string > & getInputFilesOverride() const
Return overriden input file names, or empty vector if none were set.
Definition: Environment.h:103
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:29
@ c_HLTDiscard
The HLT discarded the event and only metadata is kept.
Definition: EventMetaData.h:48
Metadata information about a file.
Definition: FileMetaData.h:29
Base class for Modules.
Definition: Module.h:72
Module to read TTree data from file into the data store.
bool m_discardErrorEvents
Discard events that have an error flag != 0.
StoreEntries m_persistentStoreEntries
Vector of DataStore entries of persistent durability that we are supposed to read in.
void realDataWorkaround(FileMetaData &metaData)
Correct isMC flag for raw data recorded before experiment 8 run 2364.
std::map< std::string, TTree * > m_parentTrees
Map of file LFNs to trees.
unsigned int m_discardErrorMask
Don't issue a warning when discarding events if the error flag consists exclusively of flags in this ...
bool m_collectStatistics
Collect statistics on amount of data read and print statistics (seperate for input & parent files) af...
long m_lastPersistentEntry
last entry to be in persistent tree.
StoreEntries m_storeEntries
Vector of DataStore entries of event durability that we are supposed to read in.
virtual void initialize() override
Initialize the Module.
TChain * m_tree
TTree for event input.
virtual void event() override
Running over all events.
int m_parentLevel
Level of parent files to be read.
std::vector< StoreEntries > m_parentStoreEntries
The parent DataStore entries per level.
virtual void terminate() override
Is called at the end of your Module.
std::string m_inputFileName
File to read from.
TChain * m_persistent
TTree for persistent input.
std::string m_lastParentFileLFN
last parent file LFN seen.
std::vector< std::string > m_inputFileNames
Files to read from.
void readPersistentEntry(long fileEntry)
Loads given entry from persistent tree.
virtual ~RootInputModule()
Destructor.
std::vector< std::string > m_excludeBranchNames[DataStore::c_NDurabilityTypes]
Array for names of branches that should NOT be written out.
int m_cacheSize
Input ROOT File Cache size in MB, <0 means default.
long m_nextEntry
Next entry to be read in event tree.
std::set< std::string > m_connectedBranches[DataStore::c_NDurabilityTypes]
Already connected branches.
void entryNotFound(const std::string &entryOrigin, const std::string &name, bool fileChanged=true)
Check if we warn the user or abort after an entry was missing after changing files.
virtual std::vector< std::string > getFileNames(bool outputFiles=false) override
Get list of input files, taking -i command line overrides into account.
bool m_ignoreCommandLineOverride
Ignore filename override from command line.
bool createParentStoreEntries()
Connect the parent trees and fill m_parentStoreEntries.
void addEventListForIndexFile(const std::string &parentLfn)
For index files, this creates TEventList/TEntryListArray to enable better cache use.
RootInputModule()
Constructor.
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
bool m_processingAllEvents
Set to true if we process the input files completely: No skip events or sequences or -n parameters.
std::vector< int > m_skipToEvent
experiment, run, event number of first event to load
ReadStats m_readStats
some statistics for all files read so far.
unsigned int m_skipNEvents
Can be set from steering file to skip the first N events.
void readTree()
Actually performs the reading from the tree.
bool readParentTrees()
Read data of the current event from the parents.
std::vector< std::string > m_branchNames[DataStore::c_NDurabilityTypes]
Array for names of branches, that shall be written out.
std::vector< std::string > m_entrySequences
The number sequences (e.g.
bool connectBranches(TTree *tree, DataStore::EDurability durability, StoreEntries *storeEntries)
Connect branches of the given tree to the data store.
Abstract base class for different kinds of events.
for collecting statistics over multiple files.
std::string getString() const
string suitable for printing.
long calls
number of read calls.
void add(const ReadStats &b)
add other stats object.
void addFromFile(const TFile *f)
add current statistics from TFile object.
long bytesRead
total number of bytes read.
long bytesReadExtra
what TFile thinks was the overhead.