Belle II Software  release-06-02-00
RootInputModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 
10 #include <framework/modules/rootio/RootInputModule.h>
11 
12 #include <framework/io/RootIOUtilities.h>
13 #include <framework/io/RootFileInfo.h>
14 #include <framework/core/FileCatalog.h>
15 #include <framework/core/InputController.h>
16 #include <framework/pcore/Mergeable.h>
17 #include <framework/datastore/StoreObjPtr.h>
18 #include <framework/datastore/DataStore.h>
19 #include <framework/datastore/DependencyMap.h>
20 #include <framework/dataobjects/EventMetaData.h>
21 #include <framework/utilities/NumberSequence.h>
22 #include <framework/utilities/ScopeGuard.h>
23 #include <framework/database/Configuration.h>
24 
25 #include <TClonesArray.h>
26 #include <TEventList.h>
27 #include <TObjArray.h>
28 #include <TChainElement.h>
29 #include <TError.h>
30 
31 #include <iomanip>
32 
33 using namespace std;
34 using namespace Belle2;
35 using namespace RootIOUtilities;
36 
37 REG_MODULE(RootInput)
38 
39 RootInputModule::RootInputModule() : Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
40 {
41  //Set module properties
42  setDescription("Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
43  setPropertyFlags(c_Input);
44 
45  //Parameter definition
46  vector<string> emptyvector;
47  addParam("inputFileName", m_inputFileName,
48  "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
49  string(""));
50  addParam("inputFileNames", m_inputFileNames,
51  "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
52  emptyvector);
53  addParam("entrySequences", m_entrySequences,
54  "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
55  "Must be specified exactly once for each file to be opened."
56  "The first event has the entry number 0.", emptyvector);
57  addParam("ignoreCommandLineOverride" , m_ignoreCommandLineOverride,
58  "Ignore override of file name via command line argument -i.", false);
59 
60  addParam("skipNEvents", m_skipNEvents, "Skip this number of events before starting.", 0u);
61  addParam("skipToEvent", m_skipToEvent, "Skip events until the event with "
62  "the specified (experiment, run, event number) occurs. This parameter "
63  "is useful for debugging to start with a specific event.", m_skipToEvent);
64 
65  addParam(c_SteerBranchNames[0], m_branchNames[0],
66  "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
67  addParam(c_SteerBranchNames[1], m_branchNames[1],
68  "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
69 
70  addParam(c_SteerExcludeBranchNames[0], m_excludeBranchNames[0],
71  "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
72  vector<string> excludePersistent({"ProcessStatistics"});
73  addParam(c_SteerExcludeBranchNames[1], m_excludeBranchNames[1],
74  "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
75 
76  addParam("parentLevel", m_parentLevel,
77  "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
78  0);
79 
80  addParam("collectStatistics" , m_collectStatistics,
81  "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
82  false);
83  addParam("cacheSize", m_cacheSize,
84  "file cache size in Mbytes. If negative, use root default", 0);
85 
86  addParam("discardErrorEvents", m_discardErrorEvents,
87  "Discard events with an error flag != 0", m_discardErrorEvents);
88  addParam("silentErrrorDiscardMask", m_discardErrorMask,
89  "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined "
90  "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags "
91  "present in this mask", m_discardErrorMask);
92 }
93 
94 RootInputModule::~RootInputModule() = default;
95 
96 void RootInputModule::initialize()
97 {
98  unsigned int skipNEventsOverride = Environment::Instance().getSkipEventsOverride();
99  if (skipNEventsOverride != 0)
100  m_skipNEvents = skipNEventsOverride;
101 
102  auto entrySequencesOverride = Environment::Instance().getEntrySequencesOverride();
103  if (entrySequencesOverride.size() > 0)
104  m_entrySequences = entrySequencesOverride;
105 
106  m_nextEntry = m_skipNEvents;
107  m_lastPersistentEntry = -1;
108  m_lastParentFileLFN = "";
109 
110  const vector<string>& inputFiles = getFileNames();
111  if (inputFiles.empty()) {
112  B2FATAL("You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
113  }
114  if (!m_inputFileName.empty() && !m_inputFileNames.empty()) {
115  B2FATAL("Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
116  }
117  m_inputFileNames = expandWordExpansions(inputFiles);
118  if (m_inputFileNames.empty()) {
119  B2FATAL("No valid files specified!");
120  }
121 
122  if (m_entrySequences.size() > 0 and m_inputFileNames.size() != m_entrySequences.size()) {
123  B2FATAL("Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
124  << m_inputFileNames.size() << " len(entrySequences) = " << m_entrySequences.size());
125  }
126 
127  m_inputFileName = "";
128  // we'll only use m_inputFileNames from now on
129 
130  // so let's create the chain objects ...
131  m_persistent = new TChain(c_treeNames[DataStore::c_Persistent].c_str());
132  m_tree = new TChain(c_treeNames[DataStore::c_Event].c_str());
133 
134  // time for sanity checks. The problem is that this needs to read a few bytes
135  // from every input file so for jobs with large amount of input files this
136  // will not be efficient.
137  // TODO: We might want to create a different input module which will not
138  // check anything and require manual input like the number of events in
139  // each file and the global tags to use. That would be more efficient
140  // but also less safe
141 
142  // list of required branches. We keep this empty for now and only fill
143  // it after we checked the first file to make sure all other files have the
144  // same branches available.
145  std::set<std::string> requiredEventBranches;
146  std::set<std::string> requiredPersistentBranches;
147  // Event metadata from all files, keep it around for sanity checks and globaltag replay
148  std::vector<FileMetaData> fileMetaData;
149  // and if so, what is the sum
150  std::result_of<decltype(&FileMetaData::getMcEvents)(FileMetaData)>::type sumInputMCEvents{0};
151 
152  // scope for local variables
153  {
154  // temporarily disable some root warnings
155  auto rootWarningGuard = ScopeGuard::guardValue(gErrorIgnoreLevel, kWarning + 1);
156  // do all files have a consistent number of MC events? that is all positive or all zero
157  bool validInputMCEvents{true};
158  for (const string& fileName : m_inputFileNames) {
159  // read metadata and create sum of MCEvents and global tags
160  try {
161  RootIOUtilities::RootFileInfo fileInfo(fileName);
162  FileMetaData meta = fileInfo.getFileMetaData();
163  if (meta.getNEvents() == 0) {
164  B2WARNING("File appears to be empty, skipping" << LogVar("filename", fileName));
165  continue;
166  }
167  realDataWorkaround(meta);
168  fileMetaData.push_back(meta);
169  // make sure we only look at data or MC. For the first file this is trivially true
170  if (fileMetaData.front().isMC() != meta.isMC()) {
171  throw std::runtime_error("Mixing real data and simulated data for input files is not supported");
172  }
173  // accumulate number of inputMCEvents now
174  if (validInputMCEvents) {
175  // make sure that all files have either a non-zero or zero mcevents.
176  if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
177  B2WARNING("inconsistent input files: zero mcEvents, setting total number of MC events to zero" << LogVar("filename", fileName));
178  validInputMCEvents = false;
179  }
180  // So accumulate the number of MCEvents but let's be careful to not have an overflow here
181  if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
182  B2FATAL("Number of MC events is too large and cannot be represented anymore");
183  }
184  }
185  // for the first file we don't know what branches are required but now we can determine them as we know the file can be opened
186  if (requiredEventBranches.empty()) {
187  // make sure we have event meta data
188  fileInfo.checkMissingBranches({"EventMetaData"}, false);
189  requiredEventBranches = fileInfo.getBranchNames(false);
190  // filter the branches depending on what the user selected. Note we
191  // do the same thing again in connectBranches but we leave it like
192  // that because we also want to read branches from parent files
193  // selectively and thus we need to filter the branches there anyway.
194  // Here we just do it to ensure all files we read directly (which is
195  // 99% of the use case) contain all the branches we want.
196  requiredEventBranches = RootIOUtilities::filterBranches(requiredEventBranches, m_branchNames[DataStore::c_Event],
197  m_excludeBranchNames[DataStore::c_Event], DataStore::c_Event);
198  // but make sure we always have EventMetaData ...
199  requiredEventBranches.emplace("EventMetaData");
200 
201  // Same for persistent data ...
202  requiredPersistentBranches = fileInfo.getBranchNames(true);
203  // filter the branches depending on what the user selected
204  requiredPersistentBranches = RootIOUtilities::filterBranches(requiredPersistentBranches, m_branchNames[DataStore::c_Persistent],
205  m_excludeBranchNames[DataStore::c_Persistent], DataStore::c_Persistent);
206  } else {
207  // ok we already have the list ... so let's make sure following files have the same branches
208  fileInfo.checkMissingBranches(requiredEventBranches, false);
209  fileInfo.checkMissingBranches(requiredPersistentBranches, true);
210  }
211  // ok, so now we have the file, add it to the chain. We trust the amount of events from metadata here.
212  if (m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 || m_persistent->AddFile(fileName.c_str(), 1) == 0) {
213  throw std::runtime_error("Could not add file to TChain");
214  }
215  B2INFO("Added file " + fileName);
216  } catch (std::exception& e) {
217  B2FATAL("Could not open input file " << std::quoted(fileName) << ": " << e.what());
218  }
219  }
220  }
221 
222  if (m_tree->GetNtrees() == 0) B2FATAL("No file could be opened, aborting");
223  // Set cache size TODO: find out if files are remote and use a bigger default
224  // value if at least one file is non-local
225  if (m_cacheSize >= 0) m_tree->SetCacheSize(m_cacheSize * 1024 * 1024);
226 
227  // Check if the files we added to the Chain are unique,
228  // if the same file is added multiple times the TEventList used for the eventSequence feature
229  // will process each file only once with the union of both given sequences.
230  // It is not clear if the user wants this, so we raise a fatal in this situation.
231  {
232  std::set<std::string> unique_filenames;
233 
234  // The following lines are directly from the ROOT documentation
235  // see TChain::AddFile
236  TObjArray* fileElements = m_tree->GetListOfFiles();
237  TIter next(fileElements);
238  TChainElement* chEl = nullptr;
239  while ((chEl = (TChainElement*)next())) {
240  if (!unique_filenames.insert(chEl->GetTitle()).second) {
241  B2WARNING("The input file '" << chEl->GetTitle() << "' was specified more than once");
242  // seems we have duplicate files so we process events more than once. Disable forwarding of MC event number
243  m_processingAllEvents = false;
244  }
245  }
246  if ((unsigned int)m_tree->GetNtrees() != unique_filenames.size() && m_entrySequences.size() > 0) {
247  B2FATAL("You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
248  "Please specify each file only once if you're using the sequence feature!");
249  }
250  }
251 
252  if (m_entrySequences.size() > 0) {
253  auto* elist = new TEventList("input_event_list");
254  for (unsigned int iFile = 0; iFile < m_entrySequences.size(); ++iFile) {
255  int64_t offset = m_tree->GetTreeOffset()[iFile];
256  int64_t next_offset = m_tree->GetTreeOffset()[iFile + 1];
257  // check if Sequence consists only of ':', e.g. the whole file is requested
258  if (m_entrySequences[iFile] == ":") {
259  for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
260  elist->Enter(global_entry);
261  } else {
262  for (const auto& entry : generate_number_sequence(m_entrySequences[iFile])) {
263  int64_t global_entry = entry + offset;
264  if (global_entry >= next_offset) {
265  B2WARNING("Given sequence contains entry numbers which are out of range. "
266  "I won't add any further events to the EventList for the current file.");
267  break;
268  } else {
269  elist->Enter(global_entry);
270  }
271  }
272  }
273  }
274  m_tree->SetEventList(elist);
275  }
276 
277  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Persistent] + "'" << LogVar("entries", m_persistent->GetEntriesFast()));
278  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Event] + "'" << LogVar("entries", m_tree->GetEntriesFast()));
279 
280  connectBranches(m_persistent, DataStore::c_Persistent, &m_persistentStoreEntries);
281  readPersistentEntry(0);
282 
283  if (!connectBranches(m_tree, DataStore::c_Event, &m_storeEntries)) {
284  delete m_tree;
285  m_tree = nullptr; //don't try to read from there
286  } else {
287  InputController::setCanControlInput(true);
288  InputController::setChain(m_tree);
289  }
290 
291  if (m_parentLevel > 0) {
292  createParentStoreEntries();
293  } else if (m_parentLevel < 0) {
294  B2ERROR("parentLevel must be >= 0!");
295  return;
296  }
297 
298  // Let's check check if we process everything
299  // * all filenames unique (already done above)
300  // * no event skipping either with skipN, entry sequences or skipToEvent
301  // * no -n or process(path, N) with N <= the number of entries in our files
302  unsigned int maxEvent = Environment::Instance().getNumberEventsOverride();
303  m_processingAllEvents &= m_skipNEvents == 0 && m_entrySequences.size() == 0;
304  m_processingAllEvents &= (maxEvent == 0 || maxEvent >= InputController::numEntries());
305 
306  if (!m_skipToEvent.empty()) {
307  // Skipping to some specific event is also not processing all events ...
308  m_processingAllEvents = false;
309  // make sure the number of entries is exactly 3
310  if (m_skipToEvent.size() != 3) {
311  B2ERROR("skipToEvent must be a list of three values: experiment, run, event number");
312  // ignore the value
313  m_skipToEvent.clear();
314  } else {
315  InputController::setNextEntry(m_skipToEvent[0], m_skipToEvent[1], m_skipToEvent[2]);
316  }
317  if (m_nextEntry > 0) {
318  B2ERROR("You cannot supply a number of events to skip (skipNEvents) and an "
319  "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
320  //force the number of skipped events to be zero
321  m_nextEntry = 0;
322  }
323  }
324 
325  // Processing everything so forward number of MC events
326  if (m_processingAllEvents) {
327  Environment::Instance().setNumberOfMCEvents(sumInputMCEvents);
328  }
329  // And setup global tag replay ...
330  Conditions::Configuration::getInstance().setInputMetadata(fileMetaData);
331 }
332 
333 
334 void RootInputModule::event()
335 {
336  if (!m_tree)
337  return;
338 
339  while (true) {
340  const long nextEntry = InputController::getNextEntry();
341  if (nextEntry >= 0 && nextEntry < InputController::numEntries()) {
342  B2INFO("RootInput: will read entry " << nextEntry << " next.");
343  m_nextEntry = nextEntry;
344  } else if (InputController::getNextExperiment() >= 0 && InputController::getNextRun() >= 0
345  && InputController::getNextEvent() >= 0) {
346  const long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(m_tree->GetTree(), InputController::getNextEvent(),
347  InputController::getNextRun(), InputController::getNextExperiment());
348  if (entry >= 0) {
349  const long chainentry = m_tree->GetChainEntryNumber(entry);
350  B2INFO("RootInput: will read entry " << chainentry << " (entry " << entry << " in current file) next.");
351  m_nextEntry = chainentry;
352  } else {
353  B2ERROR("Couldn't find entry (" << InputController::getNextEvent() << ", " << InputController::getNextRun() << ", " <<
354  InputController::getNextExperiment() << ") in file! Loading entry " << m_nextEntry << " instead.");
355  }
356  }
357  InputController::eventLoaded(m_nextEntry);
358 
359  readTree();
360  m_nextEntry++;
361 
362  // check for events with errors
363  unsigned int errorFlag = 0;
364  if (m_discardErrorEvents && (m_nextEntry >= 0)) {
365  const StoreObjPtr<EventMetaData> eventMetaData;
366  errorFlag = eventMetaData->getErrorFlag();
367  if (errorFlag != 0) {
368  if (errorFlag & ~m_discardErrorMask) {
369  B2WARNING("Discarding corrupted event" << LogVar("errorFlag", errorFlag) << LogVar("experiment", eventMetaData->getExperiment())
370  << LogVar("run", eventMetaData->getRun()) << LogVar("event", eventMetaData->getEvent()));
371  }
372  // make sure this event is not used if it's the last one in the file
373  eventMetaData->setEndOfData();
374  }
375  }
376  if (errorFlag == 0) break;
377  }
378 }
379 
380 
381 void RootInputModule::terminate()
382 {
383  if (m_collectStatistics and m_tree) {
384  //add stats for last file
385  m_readStats.addFromFile(m_tree->GetFile());
386  }
387  delete m_tree;
388  delete m_persistent;
389  ReadStats parentReadStats;
390  for (const auto& entry : m_parentTrees) {
391  TFile* f = entry.second->GetCurrentFile();
392  if (m_collectStatistics)
393  parentReadStats.addFromFile(f);
394 
395  delete f;
396  }
397 
398  if (m_collectStatistics) {
399  B2INFO("Statistics for event tree: " << m_readStats.getString());
400  B2INFO("Statistics for event tree (parent files): " << parentReadStats.getString());
401  }
402 
403  for (auto& branch : m_connectedBranches) {
404  branch.clear();
405  }
406  m_storeEntries.clear();
407  m_persistentStoreEntries.clear();
408  m_parentStoreEntries.clear();
409  m_parentTrees.clear();
410 }
411 
412 
413 void RootInputModule::readTree()
414 {
415  if (!m_tree)
416  return;
417 
418  //keep snapshot of TFile stats (to use if it changes)
419  ReadStats currentEventStats;
420  if (m_collectStatistics) {
421  currentEventStats.addFromFile(m_tree->GetFile());
422  }
423 
424  // Check if there are still new entries available.
425  int localEntryNumber = m_nextEntry;
426  if (m_entrySequences.size() > 0) {
427  localEntryNumber = m_tree->GetEntryNumber(localEntryNumber);
428  }
429  localEntryNumber = m_tree->LoadTree(localEntryNumber);
430 
431  if (localEntryNumber == -2) {
432  m_nextEntry = -2;
433  return; //end of file
434  } else if (localEntryNumber < 0) {
435  B2FATAL("Failed to load tree, corrupt file? Check standard error for additional messages. (TChain::LoadTree() returned error " <<
436  localEntryNumber << ")");
437  }
438  B2DEBUG(39, "Reading file entry " << m_nextEntry);
439 
440  //Make sure transient members of objects are reinitialised
441  for (auto entry : m_storeEntries) {
442  entry->resetForGetEntry();
443  }
444  for (const auto& storeEntries : m_parentStoreEntries) {
445  for (auto entry : storeEntries) {
446  entry->resetForGetEntry();
447  }
448  }
449 
450  int bytesRead = m_tree->GetTree()->GetEntry(localEntryNumber);
451  if (bytesRead <= 0) {
452  B2FATAL("Could not read 'tree' entry " << m_nextEntry << " in file " << m_tree->GetCurrentFile()->GetName());
453  }
454 
455  //In case someone is tempted to change this:
456  // TTree::GetCurrentFile() returns a TFile pointer to a fixed location,
457  // calling GetName() on the TFile almost works as expected, but starts with the
458  // last file in a TChain. (-> we re-read the first persistent tree with TChain,
459  // with ill results for Mergeable objects.)
460  // GetTreeNumber() also starts at the last entry before we read the first event from m_tree,
461  // so we'll save the last persistent tree loaded and only reload on changes.
462  StoreObjPtr<FileMetaData> fileMetaData("", DataStore::c_Persistent);
463  const long treeNum = m_tree->GetTreeNumber();
464  const bool fileChanged = (m_lastPersistentEntry != treeNum);
465  if (fileChanged) {
466  if (m_collectStatistics) {
467  m_readStats.add(currentEventStats);
468  }
469  // file changed, read the FileMetaData object from the persistent tree and update the parent file metadata
470  readPersistentEntry(treeNum);
471  B2INFO("Loading new input file"
472  << LogVar("filename", m_tree->GetFile()->GetName())
473  << LogVar("metadata LFN", fileMetaData->getLfn()));
474  }
475  realDataWorkaround(*fileMetaData);
476 
477  for (auto entry : m_storeEntries) {
478  if (!entry->object) {
479  entryNotFound("Event durability tree (global entry: " + std::to_string(m_nextEntry) + ")", entry->name, fileChanged);
480  entry->recoverFromNullObject();
481  entry->ptr = nullptr;
482  } else {
483  entry->ptr = entry->object;
484  }
485  }
486 
487  if (m_parentLevel > 0) {
488  if (!readParentTrees())
489  B2FATAL("Could not read data from parent file!");
490  }
491 
492  // Nooow, if the object didn't exist in the event when we wrote it to File we still have it in the file but it's marked as invalid Object.
493  // So go through everything and check for the bit and invalidate as necessary
494  for (auto entry : m_storeEntries) {
495  if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
496  }
497  for (const auto& storeEntries : m_parentStoreEntries) {
498  for (auto entry : storeEntries) {
499  if (entry->object->TestBit(kInvalidObject)) entry->invalidate();
500  }
501  }
502 }
503 
504 bool RootInputModule::connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries)
505 {
506  B2DEBUG(30, "File changed, loading persistent data.");
507  DataStore::StoreEntryMap& map = DataStore::Instance().getStoreEntryMap(durability);
508  //Go over the branchlist and connect the branches with DataStore entries
509  const TObjArray* branchesObjArray = tree->GetListOfBranches();
510  if (!branchesObjArray) {
511  B2FATAL("Tree '" << tree->GetName() << "' doesn't contain any branches!");
512  }
513  std::vector<TBranch*> branches;
514  set<string> branchList;
515  for (int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
516  auto* branch = static_cast<TBranch*>(branchesObjArray->At(jj));
517  if (!branch) continue;
518  branchList.insert(branch->GetName());
519  branches.emplace_back(branch);
520  // start with all branches disabled and only enable what we read
521  setBranchStatus(branch, false);
522  }
523  //skip branches the user doesn't want
524  branchList = filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability, true);
525  for (TBranch* branch : branches) {
526  const std::string branchName = branch->GetName();
527  //skip already connected branches
528  if (m_connectedBranches[durability].find(branchName) != m_connectedBranches[durability].end())
529  continue;
530 
531  if ((branchList.count(branchName) == 0) and
532  ((branchName != "FileMetaData") || (tree != m_persistent)) and
533  ((branchName != "EventMetaData") || (tree != m_tree))) {
534  continue;
535  }
536  auto found = setBranchStatus(branch, true);
537  B2DEBUG(32, "Enabling branch" << LogVar("branchName", branchName)
538  << LogVar("children found", found));
539 
540  //Get information about the object in the branch
541  TObject* objectPtr = nullptr;
542  branch->SetAddress(&objectPtr);
543  branch->GetEntry();
544  bool array = (string(branch->GetClassName()) == "TClonesArray");
545  TClass* objClass = nullptr;
546  if (array)
547  objClass = (static_cast<TClonesArray*>(objectPtr))->GetClass();
548  else
549  objClass = objectPtr->IsA();
550  delete objectPtr;
551 
552  //Create a DataStore entry and connect the branch address to it
553  if (!DataStore::Instance().registerEntry(branchName, durability, objClass, array, DataStore::c_WriteOut)) {
554  B2FATAL("Cannot connect branch to datastore" << LogVar("branchName", branchName));
555  continue;
556  }
557  DataStore::StoreEntry& entry = (map.find(branchName))->second;
558  tree->SetBranchAddress(branch->GetName(), &(entry.object));
559  if (storeEntries) storeEntries->push_back(&entry);
560 
561  //Keep track of already connected branches
562  m_connectedBranches[durability].insert(branchName);
563  }
564 
565  return true;
566 }
567 
568 
569 bool RootInputModule::createParentStoreEntries()
570 {
571  // get the experiment/run/event number and parentLfn of the first entry
572  TBranch* branch = m_tree->GetBranch("EventMetaData");
573  char* address = branch->GetAddress();
574  EventMetaData* eventMetaData = nullptr;
575  branch->SetAddress(&eventMetaData);
576  branch->GetEntry(0);
577  int experiment = eventMetaData->getExperiment();
578  int run = eventMetaData->getRun();
579  unsigned int event = eventMetaData->getEvent();
580  std::string parentLfn = eventMetaData->getParentLfn();
581  branch->SetAddress(address);
582 
583  // loop over parents and get their metadata
584  for (int level = 0; level < m_parentLevel; level++) {
585  // open the parent file
586  TDirectory* dir = gDirectory;
587  const std::string parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
588  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
589  dir->cd();
590  if (!file || !file->IsOpen()) {
591  B2ERROR("Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
592  << LogVar("LFN", parentLfn) << LogVar("PFN", parentPfn));
593  return false;
594  }
595 
596  // get the event tree and connect its branches
597  auto* tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
598  if (!tree) {
599  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
600  return false;
601  }
602  if (int(m_parentStoreEntries.size()) <= level) m_parentStoreEntries.resize(level + 1);
603  connectBranches(tree, DataStore::c_Event, &m_parentStoreEntries[level]);
604  m_parentTrees.insert(std::make_pair(parentLfn, tree));
605 
606  // get the persistent tree and read its branches
607  auto* persistent = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Persistent].c_str()));
608  if (!persistent) {
609  B2ERROR("No tree " << c_treeNames[DataStore::c_Persistent] << " found in " << parentPfn);
610  return false;
611  }
612  connectBranches(persistent, DataStore::c_Persistent, nullptr);
613 
614  // get parent LFN of parent
615  EventMetaData* metaData = nullptr;
616  tree->SetBranchAddress("EventMetaData", &metaData);
617  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
618  tree->GetBranch("EventMetaData")->GetEntry(entry);
619  parentLfn = metaData->getParentLfn();
620  }
621 
622  return true;
623 }
624 
625 
626 bool RootInputModule::readParentTrees()
627 {
628  const StoreObjPtr<EventMetaData> eventMetaData;
629  int experiment = eventMetaData->getExperiment();
630  int run = eventMetaData->getRun();
631  unsigned int event = eventMetaData->getEvent();
632 
633  std::string parentLfn = eventMetaData->getParentLfn();
634  for (int level = 0; level < m_parentLevel; level++) {
635  const std::string& parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
636 
637  // Open the parent file if we haven't done this already
638  TTree* tree = nullptr;
639  if (m_parentTrees.find(parentLfn) == m_parentTrees.end()) {
640  TDirectory* dir = gDirectory;
641  B2DEBUG(30, "Opening parent file" << LogVar("PFN", parentPfn));
642  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
643  dir->cd();
644  if (!file || !file->IsOpen()) {
645  B2ERROR("Couldn't open parent file " << parentPfn);
646  return false;
647  }
648  tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
649  if (!tree) {
650  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
651  return false;
652  }
653  for (auto entry : m_parentStoreEntries[level]) {
654  tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
655  }
656  m_parentTrees.insert(std::make_pair(parentLfn, tree));
657  } else {
658  tree = m_parentTrees[parentLfn];
659  }
660 
661  // get entry number in parent tree
662  long entryNumber = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
663  if (entryNumber < 0) {
664  B2ERROR("No event " << experiment << "/" << run << "/" << event << " in parent file " << parentPfn);
665  return false;
666  }
667 
668  // read the tree and mark the data read in the data store
669  EventMetaData* parentMetaData = nullptr;
670  tree->SetBranchAddress("EventMetaData", &parentMetaData);
671  tree->GetEntry(entryNumber);
672  for (auto entry : m_parentStoreEntries[level]) {
673  entry->ptr = entry->object;
674  }
675 
676  // set the parent LFN to the next level
677  parentLfn = parentMetaData->getParentLfn();
678  }
679 
680  addEventListForIndexFile(parentLfn);
681 
682  return true;
683 }
684 
685 void RootInputModule::addEventListForIndexFile(const std::string& parentLfn)
686 {
687  //is this really an index file? (=only EventMetaData stored)
688  if (!(m_parentLevel > 0 and m_storeEntries.size() == 1))
689  return;
690  //did we handle the current parent file already?
691  if (parentLfn == m_lastParentFileLFN)
692  return;
693  m_lastParentFileLFN = parentLfn;
694 
695  B2INFO("Index file detected, scanning to generate event list.");
696  TTree* tree = m_parentTrees.at(parentLfn);
697 
698  //both types of list work, TEventList seems to result in slightly less data being read.
699  auto* elist = new TEventList("parent_entrylist");
700  //TEntryListArray* elist = new TEntryListArray();
701 
702  TBranch* branch = m_tree->GetBranch("EventMetaData");
703  auto* address = branch->GetAddress();
704  EventMetaData* eventMetaData = nullptr;
705  branch->SetAddress(&eventMetaData);
706  long nEntries = m_tree->GetEntries();
707  for (long i = m_nextEntry; i < nEntries; i++) {
708  branch->GetEntry(i);
709  int experiment = eventMetaData->getExperiment();
710  int run = eventMetaData->getRun();
711  unsigned int event = eventMetaData->getEvent();
712  const std::string& newParentLfn = eventMetaData->getParentLfn();
713 
714  if (parentLfn != newParentLfn) {
715  //parent file changed, stopping for now
716  break;
717  }
718  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
719  elist->Enter(entry);
720  }
721  branch->SetAddress(address);
722 
723  if (tree) {
724  tree->SetEventList(elist);
725  //tree->SetEntryList(elist);
726  }
727 }
728 
729 void RootInputModule::entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged)
730 {
731  if (name == "ProcessStatistics" or DataStore::Instance().getDependencyMap().isUsedAs(name, DependencyMap::c_Input)) {
732  B2FATAL(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain required object " << name << ", aborting.");
733  } else if (fileChanged) {
734  B2WARNING(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain object " << name <<
735  " that was present in a previous entry.");
736  }
737 }
738 
739 void RootInputModule::readPersistentEntry(long fileEntry)
740 {
741  m_lastPersistentEntry = fileEntry;
742 
743  for (auto entry : m_persistentStoreEntries) {
744  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
745  TObject* copyOfPreviousVersion = nullptr;
746  if (isMergeable) {
747  copyOfPreviousVersion = entry->object->Clone();
748  }
749  entry->resetForGetEntry();
750  //ptr stores old value (or nullptr)
751  entry->ptr = copyOfPreviousVersion;
752  }
753 
754  int bytesRead = m_persistent->GetEntry(fileEntry);
755  if (bytesRead <= 0) {
756  const char* name = m_tree->GetCurrentFile() ? m_tree->GetCurrentFile()->GetName() : "<unknown>";
757  B2FATAL("Could not read 'persistent' TTree #" << fileEntry << " in file " << name);
758  }
759 
760  for (auto entry : m_persistentStoreEntries) {
761  if (entry->object) {
762  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
763  if (isMergeable) {
764  const Mergeable* oldObj = static_cast<Mergeable*>(entry->ptr);
765  auto* newObj = static_cast<Mergeable*>(entry->object);
766  newObj->merge(oldObj);
767 
768  delete entry->ptr;
769  }
770  entry->ptr = entry->object;
771  } else {
772  entryNotFound("Persistent tree", entry->name);
773  entry->recoverFromNullObject();
774  entry->ptr = nullptr;
775  }
776  }
777 }
778 
779 void RootInputModule::realDataWorkaround(FileMetaData& metaData)
780 {
781  if ((metaData.getSite().find("bfe0") == 0) && (metaData.getDate().compare("2019-06-30") < 0) &&
782  (metaData.getExperimentLow() > 0) && (metaData.getExperimentHigh() < 9) && (metaData.getRunLow() > 0)) {
783  metaData.declareRealData();
784  }
785 }
EDurability
Durability types.
Definition: DataStore.h:58
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
Definition: DataStore.h:87
Store event, run, and experiment numbers.
Definition: EventMetaData.h:33
int getRun() const
Run Getter.
unsigned int getEvent() const
Event Getter.
int getExperiment() const
Experiment Getter.
const std::string & getParentLfn() const
Return LFN of the current parent file, or an empty string if not set.
Metadata information about a file.
Definition: FileMetaData.h:29
int getRunLow() const
Lowest run number getter.
Definition: FileMetaData.h:49
void declareRealData()
Declare that this is not generated, but real data.
Definition: FileMetaData.h:282
const std::string & getDate() const
File creation date and time getter (UTC)
Definition: FileMetaData.h:91
int getExperimentLow() const
Lowest experiment number getter.
Definition: FileMetaData.h:45
int getExperimentHigh() const
Highest experiment number getter.
Definition: FileMetaData.h:57
const std::string & getSite() const
Site where the file was created getter.
Definition: FileMetaData.h:95
Abstract base class for objects that can be merged.
Definition: Mergeable.h:31
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
Base class for Modules.
Definition: Module.h:72
Helper class to factorize some necessary tasks when working with Belle2 output files.
Definition: RootFileInfo.h:26
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
Definition: RootFileInfo.cc:41
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
Definition: RootFileInfo.cc:75
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
Definition: RootFileInfo.cc:55
Module to read TTree data from file into the data store.
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:95
Class to store variables with their name which were sent to the logging service.
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
@ c_Input
Input Process.
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Abstract base class for different kinds of events.
for collecting statistics over multiple files.
std::string getString() const
string suitable for printing.
void addFromFile(const TFile *f)
add current statistics from TFile object.
Wraps a stored array/object, stored under unique (name, durability) key.
Definition: StoreEntry.h:22
TObject * object
The pointer to the actual object.
Definition: StoreEntry.h:48