Belle II Software  light-2303-iriomote
RootInputModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 
10 #include <framework/modules/rootio/RootInputModule.h>
11 
12 #include <framework/io/RootIOUtilities.h>
13 #include <framework/io/RootFileInfo.h>
14 #include <framework/core/FileCatalog.h>
15 #include <framework/core/InputController.h>
16 #include <framework/pcore/Mergeable.h>
17 #include <framework/datastore/StoreObjPtr.h>
18 #include <framework/datastore/DataStore.h>
19 #include <framework/datastore/DependencyMap.h>
20 #include <framework/dataobjects/EventMetaData.h>
21 #include <framework/utilities/NumberSequence.h>
22 #include <framework/utilities/ScopeGuard.h>
23 #include <framework/database/Configuration.h>
24 
25 #include <TClonesArray.h>
26 #include <TEventList.h>
27 #include <TObjArray.h>
28 #include <TChainElement.h>
29 #include <TError.h>
30 
31 #include <iomanip>
32 
33 using namespace std;
34 using namespace Belle2;
35 using namespace RootIOUtilities;
36 
37 REG_MODULE(RootInput);
38 
39 RootInputModule::RootInputModule() : Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
40 {
41  //Set module properties
42  setDescription("Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
44 
45  //Parameter definition
46  vector<string> emptyvector;
47  addParam("inputFileName", m_inputFileName,
48  "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
49  string(""));
50  addParam("inputFileNames", m_inputFileNames,
51  "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
52  emptyvector);
53  addParam("entrySequences", m_entrySequences,
54  "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
55  "Must be specified exactly once for each file to be opened."
56  "The first event has the entry number 0.", emptyvector);
57  addParam("ignoreCommandLineOverride", m_ignoreCommandLineOverride,
58  "Ignore override of file name via command line argument -i.", false);
59 
60  addParam("skipNEvents", m_skipNEvents, "Skip this number of events before starting.", 0u);
61  addParam("skipToEvent", m_skipToEvent, "Skip events until the event with "
62  "the specified (experiment, run, event number) occurs. This parameter "
63  "is useful for debugging to start with a specific event.", m_skipToEvent);
64 
66  "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
68  "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
69 
71  "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
72  vector<string> excludePersistent({"ProcessStatistics"});
74  "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
75 
76  addParam("parentLevel", m_parentLevel,
77  "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
78  0);
79 
80  addParam("collectStatistics", m_collectStatistics,
81  "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
82  false);
83  addParam("cacheSize", m_cacheSize,
84  "file cache size in Mbytes. If negative, use root default", 0);
85 
86  addParam("discardErrorEvents", m_discardErrorEvents,
87  "Discard events with an error flag != 0", m_discardErrorEvents);
88  addParam("silentErrrorDiscardMask", m_discardErrorMask,
89  "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined "
90  "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags "
91  "present in this mask", m_discardErrorMask);
92 
93  addParam("isSecondaryInput", m_isSecondaryInput,
94  "When using a second RootInputModule in an independent path [usually if you are using add_independent_merge_path(...)] "
95  "this has to be set to true",
96  false);
97 }
98 
100 
102 {
103  unsigned int skipNEventsOverride = Environment::Instance().getSkipEventsOverride();
104  if (skipNEventsOverride != 0)
105  m_skipNEvents = skipNEventsOverride;
106 
107  auto entrySequencesOverride = Environment::Instance().getEntrySequencesOverride();
108  if (entrySequencesOverride.size() > 0)
109  m_entrySequences = entrySequencesOverride;
110 
113  m_lastParentFileLFN = "";
114 
115  const vector<string>& inputFiles = getFileNames();
116  if (inputFiles.empty()) {
117  B2FATAL("You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
118  }
119  if (!m_inputFileName.empty() && !m_inputFileNames.empty()) {
120  B2FATAL("Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
121  }
123  if (m_inputFileNames.empty()) {
124  B2FATAL("No valid files specified!");
125  }
126 
127  if (m_entrySequences.size() > 0 and m_inputFileNames.size() != m_entrySequences.size()) {
128  B2FATAL("Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
129  << m_inputFileNames.size() << " len(entrySequences) = " << m_entrySequences.size());
130  }
131 
132  m_inputFileName = "";
133  // we'll only use m_inputFileNames from now on
134 
135  // so let's create the chain objects ...
136  m_persistent = new TChain(c_treeNames[DataStore::c_Persistent].c_str());
137  m_tree = new TChain(c_treeNames[DataStore::c_Event].c_str());
138 
139  // time for sanity checks. The problem is that this needs to read a few bytes
140  // from every input file so for jobs with large amount of input files this
141  // will not be efficient.
142  // TODO: We might want to create a different input module which will not
143  // check anything and require manual input like the number of events in
144  // each file and the global tags to use. That would be more efficient
145  // but also less safe
146 
147  // list of required branches. We keep this empty for now and only fill
148  // it after we checked the first file to make sure all other files have the
149  // same branches available.
150  std::set<std::string> requiredEventBranches;
151  std::set<std::string> requiredPersistentBranches;
152  // Event metadata from all files, keep it around for sanity checks and globaltag replay
153  std::vector<FileMetaData> fileMetaData;
154  // and if so, what is the sum
155  std::result_of<decltype(&FileMetaData::getMcEvents)(FileMetaData)>::type sumInputMCEvents{0};
156 
157  // scope for local variables
158  {
159  // temporarily disable some root warnings
160  auto rootWarningGuard = ScopeGuard::guardValue(gErrorIgnoreLevel, kWarning + 1);
161  // do all files have a consistent number of MC events? that is all positive or all zero
162  bool validInputMCEvents{true};
163  for (const string& fileName : m_inputFileNames) {
164  // read metadata and create sum of MCEvents and global tags
165  try {
166  RootIOUtilities::RootFileInfo fileInfo(fileName);
167  FileMetaData meta = fileInfo.getFileMetaData();
168  if (meta.getNEvents() == 0) {
169  B2WARNING("File appears to be empty, skipping" << LogVar("filename", fileName));
170  continue;
171  }
172  realDataWorkaround(meta);
173  fileMetaData.push_back(meta);
174  // make sure we only look at data or MC. For the first file this is trivially true
175  if (fileMetaData.front().isMC() != meta.isMC()) {
176  throw std::runtime_error("Mixing real data and simulated data for input files is not supported");
177  }
178  // accumulate number of inputMCEvents now
179  if (validInputMCEvents) {
180  // make sure that all files have either a non-zero or zero mcevents.
181  if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
182  B2WARNING("inconsistent input files: zero mcEvents, setting total number of MC events to zero" << LogVar("filename", fileName));
183  validInputMCEvents = false;
184  }
185  // So accumulate the number of MCEvents but let's be careful to not have an overflow here
186  if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
187  B2FATAL("Number of MC events is too large and cannot be represented anymore");
188  }
189  }
190  // for the first file we don't know what branches are required but now we can determine them as we know the file can be opened
191  if (requiredEventBranches.empty()) {
192  // make sure we have event meta data
193  fileInfo.checkMissingBranches({"EventMetaData"}, false);
194  requiredEventBranches = fileInfo.getBranchNames(false);
195  // filter the branches depending on what the user selected. Note we
196  // do the same thing again in connectBranches but we leave it like
197  // that because we also want to read branches from parent files
198  // selectively and thus we need to filter the branches there anyway.
199  // Here we just do it to ensure all files we read directly (which is
200  // 99% of the use case) contain all the branches we want.
201  requiredEventBranches = RootIOUtilities::filterBranches(requiredEventBranches, m_branchNames[DataStore::c_Event],
203  // but make sure we always have EventMetaData ...
204  requiredEventBranches.emplace("EventMetaData");
205 
206  // Same for persistent data ...
207  requiredPersistentBranches = fileInfo.getBranchNames(true);
208  // filter the branches depending on what the user selected
209  requiredPersistentBranches = RootIOUtilities::filterBranches(requiredPersistentBranches, m_branchNames[DataStore::c_Persistent],
211  } else {
212  // ok we already have the list ... so let's make sure following files have the same branches
213  fileInfo.checkMissingBranches(requiredEventBranches, false);
214  fileInfo.checkMissingBranches(requiredPersistentBranches, true);
215  }
216  // ok, so now we have the file, add it to the chain. We trust the amount of events from metadata here.
217  if (m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 || m_persistent->AddFile(fileName.c_str(), 1) == 0) {
218  throw std::runtime_error("Could not add file to TChain");
219  }
220  B2INFO("Added file " + fileName);
221  } catch (std::exception& e) {
222  B2FATAL("Could not open input file " << std::quoted(fileName) << ": " << e.what());
223  }
224  }
225  }
226 
227  if (m_tree->GetNtrees() == 0) B2FATAL("No file could be opened, aborting");
228  // Set cache size TODO: find out if files are remote and use a bigger default
229  // value if at least one file is non-local
230  if (m_cacheSize >= 0) m_tree->SetCacheSize(m_cacheSize * 1024 * 1024);
231 
232  // Check if the files we added to the Chain are unique,
233  // if the same file is added multiple times the TEventList used for the eventSequence feature
234  // will process each file only once with the union of both given sequences.
235  // It is not clear if the user wants this, so we raise a fatal in this situation.
236  {
237  std::set<std::string> unique_filenames;
238 
239  // The following lines are directly from the ROOT documentation
240  // see TChain::AddFile
241  TObjArray* fileElements = m_tree->GetListOfFiles();
242  TIter next(fileElements);
243  TChainElement* chEl = nullptr;
244  while ((chEl = (TChainElement*)next())) {
245  if (!unique_filenames.insert(chEl->GetTitle()).second) {
246  B2WARNING("The input file '" << chEl->GetTitle() << "' was specified more than once");
247  // seems we have duplicate files so we process events more than once. Disable forwarding of MC event number
248  m_processingAllEvents = false;
249  }
250  }
251  if ((unsigned int)m_tree->GetNtrees() != unique_filenames.size() && m_entrySequences.size() > 0) {
252  B2FATAL("You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
253  "Please specify each file only once if you're using the sequence feature!");
254  }
255  }
256 
257  if (m_entrySequences.size() > 0) {
258  auto* elist = new TEventList("input_event_list");
259  for (unsigned int iFile = 0; iFile < m_entrySequences.size(); ++iFile) {
260  int64_t offset = m_tree->GetTreeOffset()[iFile];
261  int64_t next_offset = m_tree->GetTreeOffset()[iFile + 1];
262  // check if Sequence consists only of ':', e.g. the whole file is requested
263  if (m_entrySequences[iFile] == ":") {
264  for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
265  elist->Enter(global_entry);
266  } else {
267  for (const auto& entry : generate_number_sequence(m_entrySequences[iFile])) {
268  int64_t global_entry = entry + offset;
269  if (global_entry >= next_offset) {
270  B2WARNING("Given sequence contains entry numbers which are out of range. "
271  "I won't add any further events to the EventList for the current file.");
272  break;
273  } else {
274  elist->Enter(global_entry);
275  }
276  }
277  }
278  }
279  m_tree->SetEventList(elist);
280  }
281 
282  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Persistent] + "'" << LogVar("entries", m_persistent->GetEntriesFast()));
283  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Event] + "'" << LogVar("entries", m_tree->GetEntriesFast()));
284 
287 
289  delete m_tree;
290  m_tree = nullptr; //don't try to read from there
291  } else {
294  }
295 
296  if (m_parentLevel > 0) {
298  } else if (m_parentLevel < 0) {
299  B2ERROR("parentLevel must be >= 0!");
300  return;
301  }
302 
303  // Let's check check if we process everything
304  // * all filenames unique (already done above)
305  // * no event skipping either with skipN, entry sequences or skipToEvent
306  // * no -n or process(path, N) with N <= the number of entries in our files
307  unsigned int maxEvent = Environment::Instance().getNumberEventsOverride();
309  m_processingAllEvents &= (maxEvent == 0 || maxEvent >= InputController::numEntries(m_isSecondaryInput));
310 
311  if (!m_skipToEvent.empty()) {
312  // Skipping to some specific event is also not processing all events ...
313  m_processingAllEvents = false;
314  // make sure the number of entries is exactly 3
315  if (m_skipToEvent.size() != 3) {
316  B2ERROR("skipToEvent must be a list of three values: experiment, run, event number");
317  // ignore the value
318  m_skipToEvent.clear();
319  } else {
321  }
322  if (m_nextEntry > 0) {
323  B2ERROR("You cannot supply a number of events to skip (skipNEvents) and an "
324  "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
325  //force the number of skipped events to be zero
326  m_nextEntry = 0;
327  }
328  }
329 
330  // Tell the InputController which event will be processed first
331  // (important if we want to do event mixing and skip some events in the input)
332  if (m_nextEntry > 0) {
334  }
335 
336  // Processing everything so forward number of MC events
337  if (m_processingAllEvents) {
338  Environment::Instance().setNumberOfMCEvents(sumInputMCEvents);
339  }
340  // And setup global tag replay ...
342 }
343 
344 
346 {
347  if (!m_tree)
348  return;
349 
350  while (true) {
351  const long nextEntry = InputController::getNextEntry(m_isSecondaryInput);
352  if (nextEntry >= 0 && nextEntry < InputController::numEntries(m_isSecondaryInput)) {
353  // don't show this message if we are doing event merging, as it will pop up twice for every event
355  B2INFO("RootInput: will read entry " << nextEntry << " next.");
356  }
357  m_nextEntry = nextEntry;
359  && InputController::getNextEvent() >= 0) {
362  if (entry >= 0) {
363  const long chainentry = m_tree->GetChainEntryNumber(entry);
364  B2INFO("RootInput: will read entry " << chainentry << " (entry " << entry << " in current file) next.");
365  m_nextEntry = chainentry;
366  } else {
367  B2ERROR("Couldn't find entry (" << InputController::getNextEvent() << ", " << InputController::getNextRun() << ", " <<
368  InputController::getNextExperiment() << ") in file! Loading entry " << m_nextEntry << " instead.");
369  }
370  }
372 
373  readTree();
374  m_nextEntry++;
375 
376  // check for events with errors
377  unsigned int errorFlag = 0;
378  if (m_discardErrorEvents && (m_nextEntry >= 0)) {
379  const StoreObjPtr<EventMetaData> eventMetaData;
380  errorFlag = eventMetaData->getErrorFlag();
381  if (errorFlag != 0) {
382  if (errorFlag & ~m_discardErrorMask) {
383  B2WARNING("Discarding corrupted event" << LogVar("errorFlag", errorFlag) << LogVar("experiment", eventMetaData->getExperiment())
384  << LogVar("run", eventMetaData->getRun()) << LogVar("event", eventMetaData->getEvent()));
385  }
386  // make sure this event is not used if it's the last one in the file
387  eventMetaData->setEndOfData();
388  }
389  }
390  if (errorFlag == 0) break;
391  }
392 }
393 
394 
396 {
397  if (m_collectStatistics and m_tree) {
398  //add stats for last file
399  m_readStats.addFromFile(m_tree->GetFile());
400  }
401  delete m_tree;
402  delete m_persistent;
403  ReadStats parentReadStats;
404  for (const auto& entry : m_parentTrees) {
405  TFile* f = entry.second->GetCurrentFile();
407  parentReadStats.addFromFile(f);
408 
409  delete f;
410  }
411 
412  if (m_collectStatistics) {
413  B2INFO("Statistics for event tree: " << m_readStats.getString());
414  B2INFO("Statistics for event tree (parent files): " << parentReadStats.getString());
415  }
416 
417  for (auto& branch : m_connectedBranches) {
418  branch.clear();
419  }
420  m_storeEntries.clear();
421  m_persistentStoreEntries.clear();
422  m_parentStoreEntries.clear();
423  m_parentTrees.clear();
424 }
425 
426 
428 {
429  if (!m_tree)
430  return;
431 
432  //keep snapshot of TFile stats (to use if it changes)
433  ReadStats currentEventStats;
434  if (m_collectStatistics) {
435  currentEventStats.addFromFile(m_tree->GetFile());
436  }
437 
438  // Check if there are still new entries available.
439  int localEntryNumber = m_nextEntry;
440  if (m_entrySequences.size() > 0) {
441  localEntryNumber = m_tree->GetEntryNumber(localEntryNumber);
442  }
443  localEntryNumber = m_tree->LoadTree(localEntryNumber);
444 
445  if (localEntryNumber == -2) {
446  m_nextEntry = -2;
447  return; //end of file
448  } else if (localEntryNumber < 0) {
449  B2FATAL("Failed to load tree, corrupt file? Check standard error for additional messages. TChain::LoadTree() returned" <<
450  LogVar("error", localEntryNumber));
451  }
452  B2DEBUG(39, "Reading file entry " << m_nextEntry);
453 
454  //Make sure transient members of objects are reinitialised
455  for (auto entry : m_storeEntries) {
456  entry->resetForGetEntry();
457  }
458  for (const auto& storeEntries : m_parentStoreEntries) {
459  for (auto entry : storeEntries) {
460  entry->resetForGetEntry();
461  }
462  }
463 
464  int bytesRead = m_tree->GetTree()->GetEntry(localEntryNumber);
465  if (bytesRead <= 0) {
466  B2FATAL("Could not read 'tree' entry " << m_nextEntry << " in file " << m_tree->GetCurrentFile()->GetName());
467  }
468 
469  //In case someone is tempted to change this:
470  // TTree::GetCurrentFile() returns a TFile pointer to a fixed location,
471  // calling GetName() on the TFile almost works as expected, but starts with the
472  // last file in a TChain. (-> we re-read the first persistent tree with TChain,
473  // with ill results for Mergeable objects.)
474  // GetTreeNumber() also starts at the last entry before we read the first event from m_tree,
475  // so we'll save the last persistent tree loaded and only reload on changes.
477  const long treeNum = m_tree->GetTreeNumber();
478  const bool fileChanged = (m_lastPersistentEntry != treeNum);
479  if (fileChanged) {
480  if (m_collectStatistics) {
481  m_readStats.add(currentEventStats);
482  }
483  // file changed, read the FileMetaData object from the persistent tree and update the parent file metadata
484  readPersistentEntry(treeNum);
485  B2INFO("Loading new input file"
486  << LogVar("filename", m_tree->GetFile()->GetName())
487  << LogVar("metadata LFN", fileMetaData->getLfn()));
488  }
489  realDataWorkaround(*fileMetaData);
490 
491  for (auto entry : m_storeEntries) {
492  if (!entry->object) {
493  entryNotFound("Event durability tree (global entry: " + std::to_string(m_nextEntry) + ")", entry->name, fileChanged);
494  entry->recoverFromNullObject();
495  entry->ptr = nullptr;
496  } else {
497  entry->ptr = entry->object;
498  }
499  }
500 
501  if (m_parentLevel > 0) {
502  if (!readParentTrees())
503  B2FATAL("Could not read data from parent file!");
504  }
505 
506  // Nooow, if the object didn't exist in the event when we wrote it to File we still have it in the file but it's marked as invalid Object.
507  // So go through everything and check for the bit and invalidate as necessary
508  for (auto entry : m_storeEntries) {
509  if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
510  }
511  for (const auto& storeEntries : m_parentStoreEntries) {
512  for (auto entry : storeEntries) {
513  if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
514  }
515  }
516 }
517 
518 bool RootInputModule::connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries)
519 {
520  B2DEBUG(30, "File changed, loading persistent data.");
522  //Go over the branchlist and connect the branches with DataStore entries
523  const TObjArray* branchesObjArray = tree->GetListOfBranches();
524  if (!branchesObjArray) {
525  B2FATAL("Tree '" << tree->GetName() << "' doesn't contain any branches!");
526  }
527  std::vector<TBranch*> branches;
528  set<string> branchList;
529  for (int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
530  auto* branch = static_cast<TBranch*>(branchesObjArray->At(jj));
531  if (!branch) continue;
532  branchList.insert(branch->GetName());
533  branches.emplace_back(branch);
534  // start with all branches disabled and only enable what we read
535  setBranchStatus(branch, false);
536  }
537  //skip branches the user doesn't want
538  branchList = filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability, true);
539  for (TBranch* branch : branches) {
540  const std::string branchName = branch->GetName();
541  //skip already connected branches
542  if (m_connectedBranches[durability].find(branchName) != m_connectedBranches[durability].end())
543  continue;
544 
545  if ((branchList.count(branchName) == 0) and
546  ((branchName != "FileMetaData") || (tree != m_persistent)) and
547  ((branchName != "EventMetaData") || (tree != m_tree))) {
548  continue;
549  }
550  auto found = setBranchStatus(branch, true);
551  B2DEBUG(32, "Enabling branch" << LogVar("branchName", branchName)
552  << LogVar("children found", found));
553 
554  //Get information about the object in the branch
555  TObject* objectPtr = nullptr;
556  branch->SetAddress(&objectPtr);
557  branch->GetEntry();
558  bool array = (string(branch->GetClassName()) == "TClonesArray");
559  TClass* objClass = nullptr;
560  if (array)
561  objClass = (static_cast<TClonesArray*>(objectPtr))->GetClass();
562  else
563  objClass = objectPtr->IsA();
564  delete objectPtr;
565 
566  //Create a DataStore entry and connect the branch address to it
567  if (!DataStore::Instance().registerEntry(branchName, durability, objClass, array, DataStore::c_WriteOut)) {
568  B2FATAL("Cannot connect branch to datastore" << LogVar("branchName", branchName));
569  continue;
570  }
571  DataStore::StoreEntry& entry = (map.find(branchName))->second;
572  tree->SetBranchAddress(branch->GetName(), &(entry.object));
573  if (storeEntries) storeEntries->push_back(&entry);
574 
575  //Keep track of already connected branches
576  m_connectedBranches[durability].insert(branchName);
577  }
578 
579  return true;
580 }
581 
582 
584 {
585  // get the experiment/run/event number and parentLfn of the first entry
586  TBranch* branch = m_tree->GetBranch("EventMetaData");
587  char* address = branch->GetAddress();
588  EventMetaData* eventMetaData = nullptr;
589  branch->SetAddress(&eventMetaData);
590  branch->GetEntry(0);
591  int experiment = eventMetaData->getExperiment();
592  int run = eventMetaData->getRun();
593  unsigned int event = eventMetaData->getEvent();
594  std::string parentLfn = eventMetaData->getParentLfn();
595  branch->SetAddress(address);
596 
597  // loop over parents and get their metadata
598  for (int level = 0; level < m_parentLevel; level++) {
599  // open the parent file
600  TDirectory* dir = gDirectory;
601  const std::string parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
602  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
603  dir->cd();
604  if (!file || !file->IsOpen()) {
605  B2ERROR("Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
606  << LogVar("LFN", parentLfn) << LogVar("PFN", parentPfn));
607  return false;
608  }
609 
610  // get the event tree and connect its branches
611  auto* tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
612  if (!tree) {
613  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
614  return false;
615  }
616  if (int(m_parentStoreEntries.size()) <= level) m_parentStoreEntries.resize(level + 1);
618  m_parentTrees.insert(std::make_pair(parentLfn, tree));
619 
620  // get the persistent tree and read its branches
621  auto* persistent = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Persistent].c_str()));
622  if (!persistent) {
623  B2ERROR("No tree " << c_treeNames[DataStore::c_Persistent] << " found in " << parentPfn);
624  return false;
625  }
626  connectBranches(persistent, DataStore::c_Persistent, nullptr);
627 
628  // get parent LFN of parent
629  EventMetaData* metaData = nullptr;
630  tree->SetBranchAddress("EventMetaData", &metaData);
631  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
632  tree->GetBranch("EventMetaData")->GetEntry(entry);
633  parentLfn = metaData->getParentLfn();
634  }
635 
636  return true;
637 }
638 
639 
641 {
642  const StoreObjPtr<EventMetaData> eventMetaData;
643  int experiment = eventMetaData->getExperiment();
644  int run = eventMetaData->getRun();
645  unsigned int event = eventMetaData->getEvent();
646 
647  std::string parentLfn = eventMetaData->getParentLfn();
648  for (int level = 0; level < m_parentLevel; level++) {
649  const std::string& parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
650 
651  // Open the parent file if we haven't done this already
652  TTree* tree = nullptr;
653  if (m_parentTrees.find(parentLfn) == m_parentTrees.end()) {
654  TDirectory* dir = gDirectory;
655  B2DEBUG(30, "Opening parent file" << LogVar("PFN", parentPfn));
656  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
657  dir->cd();
658  if (!file || !file->IsOpen()) {
659  B2ERROR("Couldn't open parent file " << parentPfn);
660  return false;
661  }
662  tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
663  if (!tree) {
664  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
665  return false;
666  }
667  for (auto entry : m_parentStoreEntries[level]) {
668  tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
669  }
670  m_parentTrees.insert(std::make_pair(parentLfn, tree));
671  } else {
672  tree = m_parentTrees[parentLfn];
673  }
674 
675  // get entry number in parent tree
676  long entryNumber = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
677  if (entryNumber < 0) {
678  B2ERROR("No event " << experiment << "/" << run << "/" << event << " in parent file " << parentPfn);
679  return false;
680  }
681 
682  // read the tree and mark the data read in the data store
683  EventMetaData* parentMetaData = nullptr;
684  tree->SetBranchAddress("EventMetaData", &parentMetaData);
685  tree->GetEntry(entryNumber);
686  for (auto entry : m_parentStoreEntries[level]) {
687  entry->ptr = entry->object;
688  }
689 
690  // set the parent LFN to the next level
691  parentLfn = parentMetaData->getParentLfn();
692  }
693 
694  addEventListForIndexFile(parentLfn);
695 
696  return true;
697 }
698 
699 void RootInputModule::addEventListForIndexFile(const std::string& parentLfn)
700 {
701  //is this really an index file? (=only EventMetaData stored)
702  if (!(m_parentLevel > 0 and m_storeEntries.size() == 1))
703  return;
704  //did we handle the current parent file already?
705  if (parentLfn == m_lastParentFileLFN)
706  return;
707  m_lastParentFileLFN = parentLfn;
708 
709  B2INFO("Index file detected, scanning to generate event list.");
710  TTree* tree = m_parentTrees.at(parentLfn);
711 
712  //both types of list work, TEventList seems to result in slightly less data being read.
713  auto* elist = new TEventList("parent_entrylist");
714  //TEntryListArray* elist = new TEntryListArray();
715 
716  TBranch* branch = m_tree->GetBranch("EventMetaData");
717  auto* address = branch->GetAddress();
718  EventMetaData* eventMetaData = nullptr;
719  branch->SetAddress(&eventMetaData);
720  long nEntries = m_tree->GetEntries();
721  for (long i = m_nextEntry; i < nEntries; i++) {
722  branch->GetEntry(i);
723  int experiment = eventMetaData->getExperiment();
724  int run = eventMetaData->getRun();
725  unsigned int event = eventMetaData->getEvent();
726  const std::string& newParentLfn = eventMetaData->getParentLfn();
727 
728  if (parentLfn != newParentLfn) {
729  //parent file changed, stopping for now
730  break;
731  }
732  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
733  elist->Enter(entry);
734  }
735  branch->SetAddress(address);
736 
737  if (tree) {
738  tree->SetEventList(elist);
739  //tree->SetEntryList(elist);
740  }
741 }
742 
743 void RootInputModule::entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged)
744 {
745  if (name == "ProcessStatistics" or DataStore::Instance().getDependencyMap().isUsedAs(name, DependencyMap::c_Input)) {
746  B2FATAL(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain required object " << name << ", aborting.");
747  } else if (fileChanged) {
748  B2WARNING(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain object " << name <<
749  " that was present in a previous entry.");
750  }
751 }
752 
754 {
755  m_lastPersistentEntry = fileEntry;
756 
757  for (auto entry : m_persistentStoreEntries) {
758  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
759  TObject* copyOfPreviousVersion = nullptr;
760  if (isMergeable) {
761  copyOfPreviousVersion = entry->object->Clone();
762  }
763  entry->resetForGetEntry();
764  //ptr stores old value (or nullptr)
765  entry->ptr = copyOfPreviousVersion;
766  }
767 
768  int bytesRead = m_persistent->GetEntry(fileEntry);
769  if (bytesRead <= 0) {
770  const char* name = m_tree->GetCurrentFile() ? m_tree->GetCurrentFile()->GetName() : "<unknown>";
771  B2FATAL("Could not read 'persistent' TTree #" << fileEntry << " in file " << name);
772  }
773 
774  for (auto entry : m_persistentStoreEntries) {
775  if (entry->object) {
776  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
777  if (isMergeable) {
778  const Mergeable* oldObj = static_cast<Mergeable*>(entry->ptr);
779  auto* newObj = static_cast<Mergeable*>(entry->object);
780  newObj->merge(oldObj);
781 
782  delete entry->ptr;
783  }
784  entry->ptr = entry->object;
785  } else {
786  entryNotFound("Persistent tree", entry->name);
787  entry->recoverFromNullObject();
788  entry->ptr = nullptr;
789  }
790  }
791 }
792 
794 {
795  if ((metaData.getSite().find("bfe0") == 0) && (metaData.getDate().compare("2019-06-30") < 0) &&
796  (metaData.getExperimentLow() > 0) && (metaData.getExperimentHigh() < 9) && (metaData.getRunLow() > 0)) {
797  metaData.declareRealData();
798  }
799 }
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void setInputMetadata(const std::vector< FileMetaData > &inputMetadata)
To be called by input modules with the list of all input FileMetaData.
@ c_WriteOut
Object/array should be saved by output modules.
Definition: DataStore.h:70
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
Definition: DataStore.h:326
EDurability
Durability types.
Definition: DataStore.h:58
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
Definition: DataStore.h:59
static DataStore & Instance()
Instance of singleton Store.
Definition: DataStore.cc:54
bool registerEntry(const std::string &name, EDurability durability, TClass *objClass, bool array, EStoreFlags storeFlags)
Register an entry in the DataStore map.
Definition: DataStore.cc:190
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
Definition: DataStore.h:87
@ c_Input
required input.
Definition: DependencyMap.h:33
unsigned int getSkipEventsOverride() const
Get skipNEvents override, or 0 if unset.
Definition: Environment.h:88
std::vector< std::string > getEntrySequencesOverride() const
Returns the number sequences (e.g.
Definition: Environment.h:72
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:29
unsigned int getNumberEventsOverride() const
Returns number of events in run 1 for EventInfoSetter module, or 0 for no override.
Definition: Environment.h:66
void setNumberOfMCEvents(unsigned int n)
Set number of generated events (for EventInfoSetter).
Definition: Environment.h:97
Store event, run, and experiment numbers.
Definition: EventMetaData.h:33
int getRun() const
Run Getter.
unsigned int getEvent() const
Event Getter.
int getExperiment() const
Experiment Getter.
const std::string & getParentLfn() const
Return LFN of the current parent file, or an empty string if not set.
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
Definition: FileCatalog.cc:23
virtual std::string getPhysicalFileName(const std::string &lfn)
Get the physical file name for the LFN.
Definition: FileCatalog.cc:180
Metadata information about a file.
Definition: FileMetaData.h:29
int getRunLow() const
Lowest run number getter.
Definition: FileMetaData.h:49
void declareRealData()
Declare that this is not generated, but real data.
Definition: FileMetaData.h:282
bool isMC() const
Is it generated data?.
Definition: FileMetaData.h:115
unsigned int getNEvents() const
Number of events getter.
Definition: FileMetaData.h:41
const std::string & getDate() const
File creation date and time getter (UTC)
Definition: FileMetaData.h:91
int getExperimentLow() const
Lowest experiment number getter.
Definition: FileMetaData.h:45
int getExperimentHigh() const
Highest experiment number getter.
Definition: FileMetaData.h:57
unsigned int getMcEvents() const
Number of generated events getter.
Definition: FileMetaData.h:119
const std::string & getSite() const
Site where the file was created getter.
Definition: FileMetaData.h:95
static long getNextRun()
Return run number set via setNextEntry().
static void setNextEntry(long entry, bool independentPath=false)
Set the file entry to be loaded the next time event() is called.
static long getNextExperiment()
Return experiment number set via setNextEntry().
static bool getEventMerging()
Get if we are merging events from two paths.
static void eventLoaded(long entry, bool independentPath=false)
Indicate that an event (in the given entry) was loaded and reset all members related to the next entr...
static long numEntries(bool independentPath=false)
Returns total number of entries in the event tree.
static long getNextEntry(bool independentPath=false)
Return entry number set via setNextEntry().
static void setSkippedEntries(long entries, bool independentPath=false)
set the number of entries skipped by the RootInputModule.
static void setChain(const TChain *chain, bool independentPath=false)
Set the loaded TChain (event durability).
static void setCanControlInput(bool on)
Call this function from supported input modules.
static long getNextEvent()
Return event number set via setNextEntry().
Abstract base class for objects that can be merged.
Definition: Mergeable.h:31
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_Input
This module is an input module (reads data).
Definition: Module.h:78
Helper class to factorize some necessary tasks when working with Belle2 output files.
Definition: RootFileInfo.h:27
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
Definition: RootFileInfo.cc:41
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
Definition: RootFileInfo.cc:75
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
Definition: RootFileInfo.cc:55
bool m_discardErrorEvents
Discard events that have an error flag != 0.
StoreEntries m_persistentStoreEntries
Vector of DataStore entries of persistent durability that we are supposed to read in.
void realDataWorkaround(FileMetaData &metaData)
Correct isMC flag for raw data recorded before experiment 8 run 2364.
std::map< std::string, TTree * > m_parentTrees
Map of file LFNs to trees.
unsigned int m_discardErrorMask
Don't issue a warning when discarding events if the error flag consists exclusively of flags in this ...
bool m_isSecondaryInput
When using a second RootInputModule in an independent path [usually if you are using add_independent_...
bool m_collectStatistics
Collect statistics on amount of data read and print statistics (seperate for input & parent files) af...
long m_lastPersistentEntry
last entry to be in persistent tree.
StoreEntries m_storeEntries
Vector of DataStore entries of event durability that we are supposed to read in.
virtual void initialize() override
Initialize the Module.
TChain * m_tree
TTree for event input.
virtual void event() override
Running over all events.
int m_parentLevel
Level of parent files to be read.
std::vector< StoreEntries > m_parentStoreEntries
The parent DataStore entries per level.
virtual void terminate() override
Is called at the end of your Module.
std::string m_inputFileName
File to read from.
TChain * m_persistent
TTree for persistent input.
std::string m_lastParentFileLFN
last parent file LFN seen.
std::vector< std::string > m_inputFileNames
Files to read from.
void readPersistentEntry(long fileEntry)
Loads given entry from persistent tree.
virtual ~RootInputModule()
Destructor.
std::vector< std::string > m_excludeBranchNames[DataStore::c_NDurabilityTypes]
Array for names of branches that should NOT be written out.
int m_cacheSize
Input ROOT File Cache size in MB, <0 means default.
long m_nextEntry
Next entry to be read in event tree.
std::set< std::string > m_connectedBranches[DataStore::c_NDurabilityTypes]
Already connected branches.
void entryNotFound(const std::string &entryOrigin, const std::string &name, bool fileChanged=true)
Check if we warn the user or abort after an entry was missing after changing files.
virtual std::vector< std::string > getFileNames(bool outputFiles=false) override
Get list of input files, taking -i command line overrides into account.
bool m_ignoreCommandLineOverride
Ignore filename override from command line.
bool createParentStoreEntries()
Connect the parent trees and fill m_parentStoreEntries.
void addEventListForIndexFile(const std::string &parentLfn)
For index files, this creates TEventList/TEntryListArray to enable better cache use.
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
bool m_processingAllEvents
Set to true if we process the input files completely: No skip events or sequences or -n parameters.
std::vector< int > m_skipToEvent
experiment, run, event number of first event to load
ReadStats m_readStats
some statistics for all files read so far.
unsigned int m_skipNEvents
Can be set from steering file to skip the first N events.
void readTree()
Actually performs the reading from the tree.
bool readParentTrees()
Read data of the current event from the parents.
std::vector< std::string > m_branchNames[DataStore::c_NDurabilityTypes]
Array for names of branches, that shall be written out.
std::vector< std::string > m_entrySequences
The number sequences (e.g.
bool connectBranches(TTree *tree, DataStore::EDurability durability, StoreEntries *storeEntries)
Connect branches of the given tree to the data store.
static ScopeGuard guardValue(T &reference)
Create a ScopeGuard for a value: The content of reference will be copied and reset when the returned ...
Definition: ScopeGuard.h:76
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:95
Class to store variables with their name which were sent to the logging service.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
long getEntryNumberWithEvtRunExp(TTree *tree, long event, long run, long experiment)
return entry number with given (event, run, experiment) from tree.
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:23
for collecting statistics over multiple files.
std::string getString() const
string suitable for printing.
void add(const ReadStats &b)
add other stats object.
void addFromFile(const TFile *f)
add current statistics from TFile object.
Wraps a stored array/object, stored under unique (name, durability) key.
Definition: StoreEntry.h:22
TObject * object
The pointer to the actual object.
Definition: StoreEntry.h:48