Belle II Software  release-08-01-10
RootInputModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 
10 #include <framework/modules/rootio/RootInputModule.h>
11 
12 #include <framework/io/RootIOUtilities.h>
13 #include <framework/io/RootFileInfo.h>
14 #include <framework/core/FileCatalog.h>
15 #include <framework/core/InputController.h>
16 #include <framework/pcore/Mergeable.h>
17 #include <framework/datastore/StoreObjPtr.h>
18 #include <framework/datastore/DataStore.h>
19 #include <framework/datastore/DependencyMap.h>
20 #include <framework/dataobjects/EventMetaData.h>
21 #include <framework/utilities/NumberSequence.h>
22 #include <framework/utilities/ScopeGuard.h>
23 #include <framework/database/Configuration.h>
24 
25 #include <TClonesArray.h>
26 #include <TEventList.h>
27 #include <TObjArray.h>
28 #include <TChainElement.h>
29 #include <TError.h>
30 
31 #include <iomanip>
32 
33 using namespace std;
34 using namespace Belle2;
35 using namespace RootIOUtilities;
36 
37 REG_MODULE(RootInput);
38 
39 RootInputModule::RootInputModule() : Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
40 {
41  //Set module properties
42  setDescription("Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
44 
45  //Parameter definition
46  vector<string> emptyvector;
47  addParam("inputFileName", m_inputFileName,
48  "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
49  string(""));
50  addParam("inputFileNames", m_inputFileNames,
51  "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
52  emptyvector);
53  addParam("entrySequences", m_entrySequences,
54  "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
55  "Must be specified exactly once for each file to be opened."
56  "The first event has the entry number 0.", emptyvector);
57  addParam("ignoreCommandLineOverride", m_ignoreCommandLineOverride,
58  "Ignore override of file name via command line argument -i.", false);
59 
60  addParam("skipNEvents", m_skipNEvents, "Skip this number of events before starting.", 0u);
61  addParam("skipToEvent", m_skipToEvent, "Skip events until the event with "
62  "the specified (experiment, run, event number) occurs. This parameter "
63  "is useful for debugging to start with a specific event.", m_skipToEvent);
64 
66  "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
68  "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
69 
71  "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
72  vector<string> excludePersistent({"ProcessStatistics"});
74  "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
75 
76  addParam("parentLevel", m_parentLevel,
77  "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
78  0);
79 
80  addParam("collectStatistics", m_collectStatistics,
81  "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
82  false);
83  addParam("cacheSize", m_cacheSize,
84  "file cache size in Mbytes. If negative, use root default", 0);
85 
86  addParam("discardErrorEvents", m_discardErrorEvents,
87  "Discard events with an error flag != 0", m_discardErrorEvents);
88  addParam("silentErrrorDiscardMask", m_discardErrorMask,
89  "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined "
90  "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags "
91  "present in this mask", m_discardErrorMask);
92 
93  addParam("isSecondaryInput", m_isSecondaryInput,
94  "When using a second RootInputModule in an independent path [usually if you are using add_independent_merge_path(...)] "
95  "this has to be set to true",
96  false);
97 }
98 
100 
102 {
103  unsigned int skipNEventsOverride = Environment::Instance().getSkipEventsOverride();
104  if (skipNEventsOverride != 0)
105  m_skipNEvents = skipNEventsOverride;
106 
107  auto entrySequencesOverride = Environment::Instance().getEntrySequencesOverride();
108  if (entrySequencesOverride.size() > 0)
109  m_entrySequences = entrySequencesOverride;
110 
113  m_lastParentFileLFN = "";
114 
115  const vector<string>& inputFiles = getFileNames();
116  if (inputFiles.empty()) {
117  B2FATAL("You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
118  }
119  if (!m_inputFileName.empty() && !m_inputFileNames.empty()) {
120  B2FATAL("Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
121  }
123  if (m_inputFileNames.empty()) {
124  B2FATAL("No valid files specified!");
125  }
126 
127  if (m_entrySequences.size() > 0 and m_inputFileNames.size() != m_entrySequences.size()) {
128  B2FATAL("Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
129  << m_inputFileNames.size() << " len(entrySequences) = " << m_entrySequences.size());
130  }
131 
132  m_inputFileName = "";
133  // we'll only use m_inputFileNames from now on
134 
135  // so let's create the chain objects ...
136  m_persistent = new TChain(c_treeNames[DataStore::c_Persistent].c_str());
137  m_tree = new TChain(c_treeNames[DataStore::c_Event].c_str());
138 
139  // time for sanity checks. The problem is that this needs to read a few bytes
140  // from every input file so for jobs with large amount of input files this
141  // will not be efficient.
142  // TODO: We might want to create a different input module which will not
143  // check anything and require manual input like the number of events in
144  // each file and the global tags to use. That would be more efficient
145  // but also less safe
146 
147  // list of required branches. We keep this empty for now and only fill
148  // it after we checked the first file to make sure all other files have the
149  // same branches available.
150  std::set<std::string> requiredEventBranches;
151  std::set<std::string> requiredPersistentBranches;
152  // Event metadata from all files, keep it around for sanity checks and globaltag replay
153  std::vector<FileMetaData> fileMetaData;
154  // and if so, what is the sum
155  std::result_of<decltype(&FileMetaData::getMcEvents)(FileMetaData)>::type sumInputMCEvents{0};
156 
157  // scope for local variables
158  {
159  // temporarily disable some root warnings
160  auto rootWarningGuard = ScopeGuard::guardValue(gErrorIgnoreLevel, kWarning + 1);
161  // do all files have a consistent number of MC events? that is all positive or all zero
162  bool validInputMCEvents{true};
163  for (const string& fileName : m_inputFileNames) {
164  // read metadata and create sum of MCEvents and global tags
165  try {
166  RootIOUtilities::RootFileInfo fileInfo(fileName);
167  FileMetaData meta = fileInfo.getFileMetaData();
168  if (meta.getNEvents() == 0) {
169  B2WARNING("File appears to be empty, skipping" << LogVar("filename", fileName));
170  continue;
171  }
172  realDataWorkaround(meta);
173  fileMetaData.push_back(meta);
174  // make sure we only look at data or MC. For the first file this is trivially true
175  if (fileMetaData.front().isMC() != meta.isMC()) {
176  throw std::runtime_error("Mixing real data and simulated data for input files is not supported");
177  }
178  // accumulate number of inputMCEvents now
179  if (validInputMCEvents) {
180  // make sure that all files have either a non-zero or zero mcevents.
181  if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
182  B2WARNING("inconsistent input files: zero mcEvents, setting total number of MC events to zero" << LogVar("filename", fileName));
183  validInputMCEvents = false;
184  }
185  // So accumulate the number of MCEvents but let's be careful to not have an overflow here
186  if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
187  B2FATAL("Number of MC events is too large and cannot be represented anymore");
188  }
189  }
190  // for the first file we don't know what branches are required but now we can determine them as we know the file can be opened
191  if (requiredEventBranches.empty()) {
192  // make sure we have event meta data
193  fileInfo.checkMissingBranches({"EventMetaData"}, false);
194  requiredEventBranches = fileInfo.getBranchNames(false);
195  // filter the branches depending on what the user selected. Note we
196  // do the same thing again in connectBranches but we leave it like
197  // that because we also want to read branches from parent files
198  // selectively and thus we need to filter the branches there anyway.
199  // Here we just do it to ensure all files we read directly (which is
200  // 99% of the use case) contain all the branches we want.
201  requiredEventBranches = RootIOUtilities::filterBranches(requiredEventBranches, m_branchNames[DataStore::c_Event],
203  // but make sure we always have EventMetaData ...
204  requiredEventBranches.emplace("EventMetaData");
205 
206  // Same for persistent data ...
207  requiredPersistentBranches = fileInfo.getBranchNames(true);
208  // filter the branches depending on what the user selected
209  requiredPersistentBranches = RootIOUtilities::filterBranches(requiredPersistentBranches, m_branchNames[DataStore::c_Persistent],
211  } else {
212  // ok we already have the list ... so let's make sure following files have the same branches
213  fileInfo.checkMissingBranches(requiredEventBranches, false);
214  fileInfo.checkMissingBranches(requiredPersistentBranches, true);
215  }
216  // ok, so now we have the file, add it to the chain. We trust the amount of events from metadata here.
217  if (m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 || m_persistent->AddFile(fileName.c_str(), 1) == 0) {
218  throw std::runtime_error("Could not add file to TChain");
219  }
220  B2INFO("Added file " + fileName);
221  } catch (std::exception& e) {
222  B2FATAL("Could not open input file " << std::quoted(fileName) << ": " << e.what());
223  }
224  }
225  }
226 
227  if (m_tree->GetNtrees() == 0) B2FATAL("No file could be opened, aborting");
228  // Set cache size TODO: find out if files are remote and use a bigger default
229  // value if at least one file is non-local
230  if (m_cacheSize >= 0) m_tree->SetCacheSize(m_cacheSize * 1024 * 1024);
231 
232  // Check if the files we added to the Chain are unique,
233  // if the same file is added multiple times the TEventList used for the eventSequence feature
234  // will process each file only once with the union of both given sequences.
235  // It is not clear if the user wants this, so we raise a fatal in this situation.
236  {
237  std::set<std::string> unique_filenames;
238 
239  // The following lines are directly from the ROOT documentation
240  // see TChain::AddFile
241  TObjArray* fileElements = m_tree->GetListOfFiles();
242  TIter next(fileElements);
243  TChainElement* chEl = nullptr;
244  while ((chEl = (TChainElement*)next())) {
245  if (!unique_filenames.insert(chEl->GetTitle()).second) {
246  B2WARNING("The input file '" << chEl->GetTitle() << "' was specified more than once");
247  // seems we have duplicate files so we process events more than once. Disable forwarding of MC event number
248  m_processingAllEvents = false;
249  }
250  }
251  if ((unsigned int)m_tree->GetNtrees() != unique_filenames.size() && m_entrySequences.size() > 0) {
252  B2FATAL("You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
253  "Please specify each file only once if you're using the sequence feature!");
254  }
255  }
256 
257  if (m_entrySequences.size() > 0) {
258  auto* elist = new TEventList("input_event_list");
259  for (unsigned int iFile = 0; iFile < m_entrySequences.size(); ++iFile) {
260  int64_t offset = m_tree->GetTreeOffset()[iFile];
261  int64_t next_offset = m_tree->GetTreeOffset()[iFile + 1];
262  // check if Sequence consists only of ':', e.g. the whole file is requested
263  if (m_entrySequences[iFile] == ":") {
264  for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
265  elist->Enter(global_entry);
266  } else {
267  for (const auto& entry : generate_number_sequence(m_entrySequences[iFile])) {
268  int64_t global_entry = entry + offset;
269  if (global_entry >= next_offset) {
270  B2WARNING("Given sequence contains entry numbers which are out of range. "
271  "I won't add any further events to the EventList for the current file.");
272  break;
273  } else {
274  elist->Enter(global_entry);
275  }
276  }
277  }
278  }
279  m_tree->SetEventList(elist);
280  }
281 
282  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Persistent] + "'" << LogVar("entries", m_persistent->GetEntriesFast()));
283  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Event] + "'" << LogVar("entries", m_tree->GetEntriesFast()));
284 
287 
289  delete m_tree;
290  m_tree = nullptr; //don't try to read from there
291  } else {
294  }
295 
296  if (m_parentLevel > 0) {
298  } else if (m_parentLevel < 0) {
299  B2ERROR("parentLevel must be >= 0!");
300  return;
301  }
302 
303  // Let's check check if we process everything
304  // * all filenames unique (already done above)
305  // * no event skipping either with skipN, entry sequences or skipToEvent
306  // * no -n or process(path, N) with N <= the number of entries in our files
307  unsigned int maxEvent = Environment::Instance().getNumberEventsOverride();
309  m_processingAllEvents &= (maxEvent == 0 || maxEvent >= InputController::numEntries(m_isSecondaryInput));
310 
311  if (!m_skipToEvent.empty()) {
312  // Skipping to some specific event is also not processing all events ...
313  m_processingAllEvents = false;
314  // make sure the number of entries is exactly 3
315  if (m_skipToEvent.size() != 3) {
316  B2ERROR("skipToEvent must be a list of three values: experiment, run, event number");
317  // ignore the value
318  m_skipToEvent.clear();
319  } else {
321  }
322  if (m_nextEntry > 0) {
323  B2ERROR("You cannot supply a number of events to skip (skipNEvents) and an "
324  "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
325  //force the number of skipped events to be zero
326  m_nextEntry = 0;
327  }
328  }
329 
330  // Tell the InputController which event will be processed first
331  // (important if we want to do event mixing and skip some events in the input)
332  if (m_nextEntry > 0) {
334  }
335 
336  // Processing everything so forward number of MC events
337  if (m_processingAllEvents) {
338  Environment::Instance().setNumberOfMCEvents(sumInputMCEvents);
339  }
340  // And setup global tag replay ...
342 }
343 
344 
346 {
347  if (!m_tree)
348  return;
349 
350  while (true) {
351  const long nextEntry = InputController::getNextEntry(m_isSecondaryInput);
352  if (nextEntry >= 0 && nextEntry < InputController::numEntries(m_isSecondaryInput)) {
353  // don't show this message if we are doing event merging, as it will pop up twice for every event
355  B2INFO("RootInput: will read entry " << nextEntry << " next.");
356  }
357  m_nextEntry = nextEntry;
359  && InputController::getNextEvent() >= 0) {
362  if (entry >= 0) {
363  const long chainentry = m_tree->GetChainEntryNumber(entry);
364  B2INFO("RootInput: will read entry " << chainentry << " (entry " << entry << " in current file) next.");
365  m_nextEntry = chainentry;
366  } else {
367  B2ERROR("Couldn't find entry (" << InputController::getNextEvent() << ", " << InputController::getNextRun() << ", " <<
368  InputController::getNextExperiment() << ") in file! Loading entry " << m_nextEntry << " instead.");
369  }
370  }
372 
373  readTree();
374  m_nextEntry++;
375 
376  // check for events with errors
377  unsigned int errorFlag = 0;
378  if (m_discardErrorEvents && (m_nextEntry >= 0)) {
379  const StoreObjPtr<EventMetaData> eventMetaData;
380  errorFlag = eventMetaData->getErrorFlag();
381  if (errorFlag != 0) {
382  if (errorFlag & ~m_discardErrorMask) {
383  B2WARNING("Discarding corrupted event" << LogVar("errorFlag", errorFlag) << LogVar("experiment", eventMetaData->getExperiment())
384  << LogVar("run", eventMetaData->getRun()) << LogVar("event", eventMetaData->getEvent()));
385  }
386  // make sure this event is not used if it's the last one in the file
387  eventMetaData->setEndOfData();
388  }
389  }
390  if (errorFlag == 0) break;
391  }
392 }
393 
394 
396 {
397  if (m_collectStatistics and m_tree) {
398  //add stats for last file
399  m_readStats.addFromFile(m_tree->GetFile());
400  }
401  delete m_tree;
402  delete m_persistent;
403  ReadStats parentReadStats;
404  for (const auto& entry : m_parentTrees) {
405  TFile* f = entry.second->GetCurrentFile();
407  parentReadStats.addFromFile(f);
408 
409  delete f;
410  }
411 
412  if (m_collectStatistics) {
413  B2INFO("Statistics for event tree: " << m_readStats.getString());
414  B2INFO("Statistics for event tree (parent files): " << parentReadStats.getString());
415  }
416 
417  for (auto& branch : m_connectedBranches) {
418  branch.clear();
419  }
420  m_storeEntries.clear();
421  m_persistentStoreEntries.clear();
422  m_parentStoreEntries.clear();
423  m_parentTrees.clear();
424 }
425 
426 
428 {
429  if (!m_tree)
430  return;
431 
432  //keep snapshot of TFile stats (to use if it changes)
433  ReadStats currentEventStats;
434  if (m_collectStatistics) {
435  currentEventStats.addFromFile(m_tree->GetFile());
436  }
437 
438  // Check if there are still new entries available.
439  int localEntryNumber = m_nextEntry;
440  if (m_entrySequences.size() > 0) {
441  localEntryNumber = m_tree->GetEntryNumber(localEntryNumber);
442  }
443  localEntryNumber = m_tree->LoadTree(localEntryNumber);
444 
445  if (localEntryNumber == -2) {
446  m_nextEntry = -2;
447  return; //end of file
448  } else if (localEntryNumber < 0) {
449  B2FATAL("Failed to load tree, corrupt file? Check standard error for additional messages. TChain::LoadTree() returned" <<
450  LogVar("error", localEntryNumber));
451  }
452  B2DEBUG(39, "Reading file entry " << m_nextEntry);
453 
454  //Make sure transient members of objects are reinitialised
455  for (auto entry : m_storeEntries) {
456  entry->resetForGetEntry();
457  }
458  for (const auto& storeEntries : m_parentStoreEntries) {
459  for (auto entry : storeEntries) {
460  entry->resetForGetEntry();
461  }
462  }
463 
464  int bytesRead = m_tree->GetTree()->GetEntry(localEntryNumber);
465  if (bytesRead <= 0) {
466  B2FATAL("Could not read 'tree' entry " << m_nextEntry << " in file " << m_tree->GetCurrentFile()->GetName());
467  }
468 
469  //In case someone is tempted to change this:
470  // TTree::GetCurrentFile() returns a TFile pointer to a fixed location,
471  // calling GetName() on the TFile almost works as expected, but starts with the
472  // last file in a TChain. (-> we re-read the first persistent tree with TChain,
473  // with ill results for Mergeable objects.)
474  // GetTreeNumber() also starts at the last entry before we read the first event from m_tree,
475  // so we'll save the last persistent tree loaded and only reload on changes.
477  const long treeNum = m_tree->GetTreeNumber();
478  const bool fileChanged = (m_lastPersistentEntry != treeNum);
479  if (fileChanged) {
480  if (m_collectStatistics) {
481  m_readStats.add(currentEventStats);
482  }
483  // file changed, read the FileMetaData object from the persistent tree and update the parent file metadata
484  readPersistentEntry(treeNum);
485  B2INFO("Loading new input file"
486  << LogVar("filename", m_tree->GetFile()->GetName())
487  << LogVar("metadata LFN", fileMetaData->getLfn()));
488  }
489  realDataWorkaround(*fileMetaData);
490 
491  for (auto entry : m_storeEntries) {
492  if (!entry->object) {
493  entryNotFound("Event durability tree (global entry: " + std::to_string(m_nextEntry) + ")", entry->name, fileChanged);
494  entry->recoverFromNullObject();
495  entry->ptr = nullptr;
496  } else {
497  entry->ptr = entry->object;
498  }
499  }
500 
501  if (m_parentLevel > 0) {
502  if (!readParentTrees())
503  B2FATAL("Could not read data from parent file!");
504  }
505 
506  // Nooow, if the object didn't exist in the event when we wrote it to File we still have it in the file but it's marked as invalid Object.
507  // So go through everything and check for the bit and invalidate as necessary
508  for (auto entry : m_storeEntries) {
509  if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
510  }
511  for (const auto& storeEntries : m_parentStoreEntries) {
512  for (auto entry : storeEntries) {
513  if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
514  }
515  }
516 }
517 
518 bool RootInputModule::connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries)
519 {
520  B2DEBUG(30, "File changed, loading persistent data.");
522  //Go over the branchlist and connect the branches with DataStore entries
523  const TObjArray* branchesObjArray = tree->GetListOfBranches();
524  if (!branchesObjArray) {
525  B2FATAL("Tree '" << tree->GetName() << "' doesn't contain any branches!");
526  }
527  std::vector<TBranch*> branches;
528  set<string> branchList;
529  for (int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
530  auto* branch = static_cast<TBranch*>(branchesObjArray->At(jj));
531  if (!branch) continue;
532  branchList.insert(branch->GetName());
533  branches.emplace_back(branch);
534  // start with all branches disabled and only enable what we read
535  setBranchStatus(branch, false);
536  }
537  //skip branches the user doesn't want
538  branchList = filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability, true);
539  for (TBranch* branch : branches) {
540  const std::string branchName = branch->GetName();
541  //skip already connected branches
542  if (m_connectedBranches[durability].find(branchName) != m_connectedBranches[durability].end())
543  continue;
544 
545  if ((branchList.count(branchName) == 0) and
546  ((branchName != "FileMetaData") || (tree != m_persistent)) and
547  ((branchName != "EventMetaData") || (tree != m_tree))) {
548  continue;
549  }
550  auto found = setBranchStatus(branch, true);
551  B2DEBUG(32, "Enabling branch" << LogVar("branchName", branchName)
552  << LogVar("children found", found));
553 
554  //Get information about the object in the branch
555  TObject* objectPtr = nullptr;
556  branch->SetAddress(&objectPtr);
557  branch->GetEntry();
558  bool array = (string(branch->GetClassName()) == "TClonesArray");
559  TClass* objClass = nullptr;
560  if (array)
561  objClass = (static_cast<TClonesArray*>(objectPtr))->GetClass();
562  else
563  objClass = objectPtr->IsA();
564  delete objectPtr;
565 
566  //Create a DataStore entry and connect the branch address to it
567  if (!DataStore::Instance().registerEntry(branchName, durability, objClass, array, DataStore::c_WriteOut)) {
568  B2FATAL("Cannot connect branch to datastore" << LogVar("branchName", branchName));
569  continue;
570  }
571  DataStore::StoreEntry& entry = (map.find(branchName))->second;
572  tree->SetBranchAddress(branch->GetName(), &(entry.object));
573  if (storeEntries) storeEntries->push_back(&entry);
574 
575  //Keep track of already connected branches
576  m_connectedBranches[durability].insert(branchName);
577  }
578 
579  return true;
580 }
581 
582 
584 {
585  // get the experiment/run/event number and parentLfn of the first entry
586  assert(m_tree);
587  TBranch* branch = m_tree->GetBranch("EventMetaData");
588  char* address = branch->GetAddress();
589  EventMetaData* eventMetaData = nullptr;
590  branch->SetAddress(&eventMetaData);
591  branch->GetEntry(0);
592  int experiment = eventMetaData->getExperiment();
593  int run = eventMetaData->getRun();
594  unsigned int event = eventMetaData->getEvent();
595  std::string parentLfn = eventMetaData->getParentLfn();
596  branch->SetAddress(address);
597 
598  // loop over parents and get their metadata
599  for (int level = 0; level < m_parentLevel; level++) {
600  // open the parent file
601  TDirectory* dir = gDirectory;
602  const std::string parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
603  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
604  dir->cd();
605  if (!file || !file->IsOpen()) {
606  B2ERROR("Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
607  << LogVar("LFN", parentLfn) << LogVar("PFN", parentPfn));
608  return false;
609  }
610 
611  // get the event tree and connect its branches
612  auto* tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
613  if (!tree) {
614  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
615  return false;
616  }
617  if (int(m_parentStoreEntries.size()) <= level) m_parentStoreEntries.resize(level + 1);
619  m_parentTrees.insert(std::make_pair(parentLfn, tree));
620 
621  // get the persistent tree and read its branches
622  auto* persistent = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Persistent].c_str()));
623  if (!persistent) {
624  B2ERROR("No tree " << c_treeNames[DataStore::c_Persistent] << " found in " << parentPfn);
625  return false;
626  }
627  connectBranches(persistent, DataStore::c_Persistent, nullptr);
628 
629  // get parent LFN of parent
630  EventMetaData* metaData = nullptr;
631  tree->SetBranchAddress("EventMetaData", &metaData);
632  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
633  tree->GetBranch("EventMetaData")->GetEntry(entry);
634  parentLfn = metaData->getParentLfn();
635  }
636 
637  return true;
638 }
639 
640 
642 {
643  const StoreObjPtr<EventMetaData> eventMetaData;
644  int experiment = eventMetaData->getExperiment();
645  int run = eventMetaData->getRun();
646  unsigned int event = eventMetaData->getEvent();
647 
648  std::string parentLfn = eventMetaData->getParentLfn();
649  for (int level = 0; level < m_parentLevel; level++) {
650  const std::string& parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
651 
652  // Open the parent file if we haven't done this already
653  TTree* tree = nullptr;
654  if (m_parentTrees.find(parentLfn) == m_parentTrees.end()) {
655  TDirectory* dir = gDirectory;
656  B2DEBUG(30, "Opening parent file" << LogVar("PFN", parentPfn));
657  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
658  dir->cd();
659  if (!file || !file->IsOpen()) {
660  B2ERROR("Couldn't open parent file " << parentPfn);
661  return false;
662  }
663  tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
664  if (!tree) {
665  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
666  return false;
667  }
668  for (auto entry : m_parentStoreEntries[level]) {
669  tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
670  }
671  m_parentTrees.insert(std::make_pair(parentLfn, tree));
672  } else {
673  tree = m_parentTrees[parentLfn];
674  }
675 
676  // get entry number in parent tree
677  long entryNumber = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
678  if (entryNumber < 0) {
679  B2ERROR("No event " << experiment << "/" << run << "/" << event << " in parent file " << parentPfn);
680  return false;
681  }
682 
683  // read the tree and mark the data read in the data store
684  EventMetaData* parentMetaData = nullptr;
685  tree->SetBranchAddress("EventMetaData", &parentMetaData);
686  tree->GetEntry(entryNumber);
687  for (auto entry : m_parentStoreEntries[level]) {
688  entry->ptr = entry->object;
689  }
690 
691  // set the parent LFN to the next level
692  parentLfn = parentMetaData->getParentLfn();
693  }
694 
695  addEventListForIndexFile(parentLfn);
696 
697  return true;
698 }
699 
700 void RootInputModule::addEventListForIndexFile(const std::string& parentLfn)
701 {
702  //is this really an index file? (=only EventMetaData stored)
703  if (!(m_parentLevel > 0 and m_storeEntries.size() == 1))
704  return;
705  //did we handle the current parent file already?
706  if (parentLfn == m_lastParentFileLFN)
707  return;
708  m_lastParentFileLFN = parentLfn;
709 
710  B2INFO("Index file detected, scanning to generate event list.");
711  TTree* tree = m_parentTrees.at(parentLfn);
712 
713  //both types of list work, TEventList seems to result in slightly less data being read.
714  auto* elist = new TEventList("parent_entrylist");
715  //TEntryListArray* elist = new TEntryListArray();
716 
717  TBranch* branch = m_tree->GetBranch("EventMetaData");
718  auto* address = branch->GetAddress();
719  EventMetaData* eventMetaData = nullptr;
720  branch->SetAddress(&eventMetaData);
721  long nEntries = m_tree->GetEntries();
722  for (long i = m_nextEntry; i < nEntries; i++) {
723  branch->GetEntry(i);
724  int experiment = eventMetaData->getExperiment();
725  int run = eventMetaData->getRun();
726  unsigned int event = eventMetaData->getEvent();
727  const std::string& newParentLfn = eventMetaData->getParentLfn();
728 
729  if (parentLfn != newParentLfn) {
730  //parent file changed, stopping for now
731  break;
732  }
733  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
734  elist->Enter(entry);
735  }
736  branch->SetAddress(address);
737 
738  if (tree) {
739  tree->SetEventList(elist);
740  //tree->SetEntryList(elist);
741  }
742 }
743 
744 void RootInputModule::entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged)
745 {
746  if (name == "ProcessStatistics" or DataStore::Instance().getDependencyMap().isUsedAs(name, DependencyMap::c_Input)) {
747  B2FATAL(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain required object " << name << ", aborting.");
748  } else if (fileChanged) {
749  B2WARNING(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain object " << name <<
750  " that was present in a previous entry.");
751  }
752 }
753 
755 {
756  m_lastPersistentEntry = fileEntry;
757 
758  for (auto entry : m_persistentStoreEntries) {
759  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
760  TObject* copyOfPreviousVersion = nullptr;
761  if (isMergeable) {
762  copyOfPreviousVersion = entry->object->Clone();
763  }
764  entry->resetForGetEntry();
765  //ptr stores old value (or nullptr)
766  entry->ptr = copyOfPreviousVersion;
767  }
768 
769  int bytesRead = m_persistent->GetEntry(fileEntry);
770  if (bytesRead <= 0) {
771  const char* name = m_tree->GetCurrentFile() ? m_tree->GetCurrentFile()->GetName() : "<unknown>";
772  B2FATAL("Could not read 'persistent' TTree #" << fileEntry << " in file " << name);
773  }
774 
775  for (auto entry : m_persistentStoreEntries) {
776  if (entry->object) {
777  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
778  if (isMergeable) {
779  const Mergeable* oldObj = static_cast<Mergeable*>(entry->ptr);
780  auto* newObj = static_cast<Mergeable*>(entry->object);
781  newObj->merge(oldObj);
782 
783  delete entry->ptr;
784  }
785  entry->ptr = entry->object;
786  } else {
787  entryNotFound("Persistent tree", entry->name);
788  entry->recoverFromNullObject();
789  entry->ptr = nullptr;
790  }
791  }
792 }
793 
795 {
796  if ((metaData.getSite().find("bfe0") == 0) && (metaData.getDate().compare("2019-06-30") < 0) &&
797  (metaData.getExperimentLow() > 0) && (metaData.getExperimentHigh() < 9) && (metaData.getRunLow() > 0)) {
798  metaData.declareRealData();
799  }
800 }
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void setInputMetadata(const std::vector< FileMetaData > &inputMetadata)
To be called by input modules with the list of all input FileMetaData.
@ c_WriteOut
Object/array should be saved by output modules.
Definition: DataStore.h:70
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
Definition: DataStore.h:325
EDurability
Durability types.
Definition: DataStore.h:58
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
Definition: DataStore.h:59
static DataStore & Instance()
Instance of singleton Store.
Definition: DataStore.cc:54
bool registerEntry(const std::string &name, EDurability durability, TClass *objClass, bool array, EStoreFlags storeFlags)
Register an entry in the DataStore map.
Definition: DataStore.cc:190
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
Definition: DataStore.h:87
@ c_Input
required input.
Definition: DependencyMap.h:33
unsigned int getSkipEventsOverride() const
Get skipNEvents override, or 0 if unset.
Definition: Environment.h:88
std::vector< std::string > getEntrySequencesOverride() const
Returns the number sequences (e.g.
Definition: Environment.h:72
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
unsigned int getNumberEventsOverride() const
Returns number of events in run 1 for EventInfoSetter module, or 0 for no override.
Definition: Environment.h:66
void setNumberOfMCEvents(unsigned int n)
Set number of generated events (for EventInfoSetter).
Definition: Environment.h:97
Store event, run, and experiment numbers.
Definition: EventMetaData.h:33
int getRun() const
Run Getter.
unsigned int getEvent() const
Event Getter.
int getExperiment() const
Experiment Getter.
const std::string & getParentLfn() const
Return LFN of the current parent file, or an empty string if not set.
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
Definition: FileCatalog.cc:23
virtual std::string getPhysicalFileName(const std::string &lfn)
Get the physical file name for the LFN.
Definition: FileCatalog.cc:180
Metadata information about a file.
Definition: FileMetaData.h:29
int getRunLow() const
Lowest run number getter.
Definition: FileMetaData.h:53
void declareRealData()
Declare that this is not generated, but real data.
Definition: FileMetaData.h:294
const std::string & getDate() const
File creation date and time getter (UTC)
Definition: FileMetaData.h:95
int getExperimentLow() const
Lowest experiment number getter.
Definition: FileMetaData.h:49
int getExperimentHigh() const
Highest experiment number getter.
Definition: FileMetaData.h:61
unsigned int getMcEvents() const
Number of generated events getter.
Definition: FileMetaData.h:123
const std::string & getSite() const
Site where the file was created getter.
Definition: FileMetaData.h:99
static long getNextRun()
Return run number set via setNextEntry().
static void setNextEntry(long entry, bool independentPath=false)
Set the file entry to be loaded the next time event() is called.
static long getNextExperiment()
Return experiment number set via setNextEntry().
static bool getEventMerging()
Get if we are merging events from two paths.
static void eventLoaded(long entry, bool independentPath=false)
Indicate that an event (in the given entry) was loaded and reset all members related to the next entr...
static long numEntries(bool independentPath=false)
Returns total number of entries in the event tree.
static long getNextEntry(bool independentPath=false)
Return entry number set via setNextEntry().
static void setSkippedEntries(long entries, bool independentPath=false)
set the number of entries skipped by the RootInputModule.
static void setChain(const TChain *chain, bool independentPath=false)
Set the loaded TChain (event durability).
static void setCanControlInput(bool on)
Call this function from supported input modules.
static long getNextEvent()
Return event number set via setNextEntry().
Abstract base class for objects that can be merged.
Definition: Mergeable.h:31
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_Input
This module is an input module (reads data).
Definition: Module.h:78
Helper class to factorize some necessary tasks when working with Belle2 output files.
Definition: RootFileInfo.h:27
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
Definition: RootFileInfo.cc:41
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
Definition: RootFileInfo.cc:75
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
Definition: RootFileInfo.cc:55
bool m_discardErrorEvents
Discard events that have an error flag != 0.
StoreEntries m_persistentStoreEntries
Vector of DataStore entries of persistent durability that we are supposed to read in.
void realDataWorkaround(FileMetaData &metaData)
Correct isMC flag for raw data recorded before experiment 8 run 2364.
std::map< std::string, TTree * > m_parentTrees
Map of file LFNs to trees.
unsigned int m_discardErrorMask
Don't issue a warning when discarding events if the error flag consists exclusively of flags in this ...
bool m_isSecondaryInput
When using a second RootInputModule in an independent path [usually if you are using add_independent_...
bool m_collectStatistics
Collect statistics on amount of data read and print statistics (seperate for input & parent files) af...
long m_lastPersistentEntry
last entry to be in persistent tree.
StoreEntries m_storeEntries
Vector of DataStore entries of event durability that we are supposed to read in.
virtual void initialize() override
Initialize the Module.
TChain * m_tree
TTree for event input.
virtual void event() override
Running over all events.
int m_parentLevel
Level of parent files to be read.
std::vector< StoreEntries > m_parentStoreEntries
The parent DataStore entries per level.
virtual void terminate() override
Is called at the end of your Module.
std::string m_inputFileName
File to read from.
TChain * m_persistent
TTree for persistent input.
std::string m_lastParentFileLFN
last parent file LFN seen.
std::vector< std::string > m_inputFileNames
Files to read from.
void readPersistentEntry(long fileEntry)
Loads given entry from persistent tree.
virtual ~RootInputModule()
Destructor.
std::vector< std::string > m_excludeBranchNames[DataStore::c_NDurabilityTypes]
Array for names of branches that should NOT be written out.
int m_cacheSize
Input ROOT File Cache size in MB, <0 means default.
long m_nextEntry
Next entry to be read in event tree.
std::set< std::string > m_connectedBranches[DataStore::c_NDurabilityTypes]
Already connected branches.
void entryNotFound(const std::string &entryOrigin, const std::string &name, bool fileChanged=true)
Check if we warn the user or abort after an entry was missing after changing files.
virtual std::vector< std::string > getFileNames(bool outputFiles=false) override
Get list of input files, taking -i command line overrides into account.
bool m_ignoreCommandLineOverride
Ignore filename override from command line.
bool createParentStoreEntries()
Connect the parent trees and fill m_parentStoreEntries.
void addEventListForIndexFile(const std::string &parentLfn)
For index files, this creates TEventList/TEntryListArray to enable better cache use.
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
bool m_processingAllEvents
Set to true if we process the input files completely: No skip events or sequences or -n parameters.
std::vector< int > m_skipToEvent
experiment, run, event number of first event to load
ReadStats m_readStats
some statistics for all files read so far.
unsigned int m_skipNEvents
Can be set from steering file to skip the first N events.
void readTree()
Actually performs the reading from the tree.
bool readParentTrees()
Read data of the current event from the parents.
std::vector< std::string > m_branchNames[DataStore::c_NDurabilityTypes]
Array for names of branches, that shall be written out.
std::vector< std::string > m_entrySequences
The number sequences (e.g.
bool connectBranches(TTree *tree, DataStore::EDurability durability, StoreEntries *storeEntries)
Connect branches of the given tree to the data store.
static ScopeGuard guardValue(T &reference)
Create a ScopeGuard for a value: The content of reference will be copied and reset when the returned ...
Definition: ScopeGuard.h:76
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
Class to store variables with their name which were sent to the logging service.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
long getEntryNumberWithEvtRunExp(TTree *tree, long event, long run, long experiment)
return entry number with given (event, run, experiment) from tree.
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
for collecting statistics over multiple files.
std::string getString() const
string suitable for printing.
void add(const ReadStats &b)
add other stats object.
void addFromFile(const TFile *f)
add current statistics from TFile object.
Wraps a stored array/object, stored under unique (name, durability) key.
Definition: StoreEntry.h:22
TObject * object
The pointer to the actual object.
Definition: StoreEntry.h:48