Belle II Software  release-05-01-25
RootInputModule.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2012 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Martin Heck, Christian Pulvermacher, Thomas Kuhr *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 
12 #include <framework/modules/rootio/RootInputModule.h>
13 
14 #include <framework/io/RootIOUtilities.h>
15 #include <framework/io/RootFileInfo.h>
16 #include <framework/core/FileCatalog.h>
17 #include <framework/core/InputController.h>
18 #include <framework/pcore/Mergeable.h>
19 #include <framework/datastore/StoreObjPtr.h>
20 #include <framework/datastore/DataStore.h>
21 #include <framework/datastore/DependencyMap.h>
22 #include <framework/dataobjects/EventMetaData.h>
23 #include <framework/utilities/NumberSequence.h>
24 #include <framework/utilities/ScopeGuard.h>
25 #include <framework/database/Configuration.h>
26 
27 #include <TClonesArray.h>
28 #include <TEventList.h>
29 #include <TObjArray.h>
30 #include <TChainElement.h>
31 #include <TError.h>
32 
33 #include <iomanip>
34 
35 using namespace std;
36 using namespace Belle2;
37 using namespace RootIOUtilities;
38 
39 REG_MODULE(RootInput)
40 
41 RootInputModule::RootInputModule() : Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
42 {
43  //Set module properties
44  setDescription("Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
45  setPropertyFlags(c_Input);
46 
47  //Parameter definition
48  vector<string> emptyvector;
49  addParam("inputFileName", m_inputFileName,
50  "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
51  string(""));
52  addParam("inputFileNames", m_inputFileNames,
53  "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
54  emptyvector);
55  addParam("entrySequences", m_entrySequences,
56  "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
57  "Must be specified exactly once for each file to be opened."
58  "The first event has the entry number 0.", emptyvector);
59  addParam("ignoreCommandLineOverride" , m_ignoreCommandLineOverride,
60  "Ignore override of file name via command line argument -i.", false);
61 
62  addParam("skipNEvents", m_skipNEvents, "Skip this number of events before starting.", 0u);
63  addParam("skipToEvent", m_skipToEvent, "Skip events until the event with "
64  "the specified (experiment, run, event number) occurs. This parameter "
65  "is useful for debugging to start with a specific event.", m_skipToEvent);
66 
67  addParam(c_SteerBranchNames[0], m_branchNames[0],
68  "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
69  addParam(c_SteerBranchNames[1], m_branchNames[1],
70  "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
71 
72  addParam(c_SteerExcludeBranchNames[0], m_excludeBranchNames[0],
73  "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
74  vector<string> excludePersistent({"ProcessStatistics"});
75  addParam(c_SteerExcludeBranchNames[1], m_excludeBranchNames[1],
76  "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
77 
78  addParam("parentLevel", m_parentLevel,
79  "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
80  0);
81 
82  addParam("collectStatistics" , m_collectStatistics,
83  "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
84  false);
85  addParam("cacheSize", m_cacheSize,
86  "file cache size in Mbytes. If negative, use root default", 0);
87 
88  addParam("discardErrorEvents", m_discardErrorEvents,
89  "Discard events with an error flag != 0", true);
90 }
91 
92 RootInputModule::~RootInputModule() = default;
93 
94 void RootInputModule::initialize()
95 {
96  unsigned int skipNEventsOverride = Environment::Instance().getSkipEventsOverride();
97  if (skipNEventsOverride != 0)
98  m_skipNEvents = skipNEventsOverride;
99 
100  auto entrySequencesOverride = Environment::Instance().getEntrySequencesOverride();
101  if (entrySequencesOverride.size() > 0)
102  m_entrySequences = entrySequencesOverride;
103 
104  m_nextEntry = m_skipNEvents;
105  m_lastPersistentEntry = -1;
106  m_lastParentFileLFN = "";
107 
108  const vector<string>& inputFiles = getFileNames();
109  if (inputFiles.empty()) {
110  B2FATAL("You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
111  }
112  if (!m_inputFileName.empty() && !m_inputFileNames.empty()) {
113  B2FATAL("Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
114  }
115  m_inputFileNames = expandWordExpansions(inputFiles);
116  if (m_inputFileNames.empty()) {
117  B2FATAL("No valid files specified!");
118  }
119 
120  if (m_entrySequences.size() > 0 and m_inputFileNames.size() != m_entrySequences.size()) {
121  B2FATAL("Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
122  << m_inputFileNames.size() << " len(entrySequences) = " << m_entrySequences.size());
123  }
124 
125  m_inputFileName = "";
126  // we'll only use m_inputFileNames from now on
127 
128  // so let's create the chain objects ...
129  m_persistent = new TChain(c_treeNames[DataStore::c_Persistent].c_str());
130  m_tree = new TChain(c_treeNames[DataStore::c_Event].c_str());
131 
132  // time for sanity checks. The problem is that this needs to read a few bytes
133  // from every input file so for jobs with large amount of input files this
134  // will not be efficient.
135  // TODO: We might want to create a different input module which will not
136  // check anything and require manual input like the number of events in
137  // each file and the global tags to use. That would be more efficient
138  // but also less safe
139 
140  // list of required branches. We keep this empty for now and only fill
141  // it after we checked the first file to make sure all other files have the
142  // same branches available.
143  std::set<std::string> requiredEventBranches;
144  std::set<std::string> requiredPersistentBranches;
145  // Event metadata from all files, keep it around for sanity checks and globaltag replay
146  std::vector<FileMetaData> fileMetaData;
147  // and if so, what is the sum
148  std::result_of<decltype(&FileMetaData::getMcEvents)(FileMetaData)>::type sumInputMCEvents{0};
149 
150  // scope for local variables
151  {
152  // temporarily disable some root warnings
153  auto rootWarningGuard = ScopeGuard::guardValue(gErrorIgnoreLevel, kWarning + 1);
154  // do all files have a consistent number of MC events? that is all positive or all zero
155  bool validInputMCEvents{true};
156  for (const string& fileName : m_inputFileNames) {
157  // read metadata and create sum of MCEvents and global tags
158  try {
159  RootIOUtilities::RootFileInfo fileInfo(fileName);
160  FileMetaData meta = fileInfo.getFileMetaData();
161  if (meta.getNEvents() == 0) {
162  B2WARNING("File appears to be empty, skipping" << LogVar("filename", fileName));
163  continue;
164  }
165  realDataWorkaround(meta);
166  fileMetaData.push_back(meta);
167  // make sure we only look at data or MC. For the first file this is trivially true
168  if (fileMetaData.front().isMC() != meta.isMC()) {
169  throw std::runtime_error("Mixing real data and simulated data for input files is not supported");
170  }
171  // accumulate number of inputMCEvents now
172  if (validInputMCEvents) {
173  // make sure that all files have either a non-zero or zero mcevents.
174  if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
175  B2WARNING("inconsistent input files: zero mcEvents, setting total number of MC events to zero" << LogVar("filename", fileName));
176  validInputMCEvents = false;
177  }
178  // So accumulate the number of MCEvents but let's be careful to not have an overflow here
179  if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
180  B2FATAL("Number of MC events is too large and cannot be represented anymore");
181  }
182  }
183  // for the first file we don't know what branches are required but now we can determine them as we know the file can be opened
184  if (requiredEventBranches.empty()) {
185  // make sure we have event meta data
186  fileInfo.checkMissingBranches({"EventMetaData"}, false);
187  requiredEventBranches = fileInfo.getBranchNames(false);
188  // filter the branches depending on what the user selected. Note we
189  // do the same thing again in connectBranches but we leave it like
190  // that because we also want to read branches from parent files
191  // selectively and thus we need to filter the branches there anyway.
192  // Here we just do it to ensure all files we read directly (which is
193  // 99% of the use case) contain all the branches we want.
194  requiredEventBranches = RootIOUtilities::filterBranches(requiredEventBranches, m_branchNames[DataStore::c_Event],
195  m_excludeBranchNames[DataStore::c_Event], DataStore::c_Event);
196  // but make sure we always have EventMetaData ...
197  requiredEventBranches.emplace("EventMetaData");
198 
199  // Same for persistent data ...
200  requiredPersistentBranches = fileInfo.getBranchNames(true);
201  // filter the branches depending on what the user selected
202  requiredPersistentBranches = RootIOUtilities::filterBranches(requiredPersistentBranches, m_branchNames[DataStore::c_Persistent],
203  m_excludeBranchNames[DataStore::c_Persistent], DataStore::c_Persistent);
204  } else {
205  // ok we already have the list ... so let's make sure following files have the same branches
206  fileInfo.checkMissingBranches(requiredEventBranches, false);
207  fileInfo.checkMissingBranches(requiredPersistentBranches, true);
208  }
209  // ok, so now we have the file, add it to the chain. We trust the amount of events from metadata here.
210  if (m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 || m_persistent->AddFile(fileName.c_str(), 1) == 0) {
211  throw std::runtime_error("Could not add file to TChain");
212  }
213  B2INFO("Added file " + fileName);
214  } catch (std::exception& e) {
215  B2FATAL("Could not open input file " << std::quoted(fileName) << ": " << e.what());
216  }
217  }
218  }
219 
220  if (m_tree->GetNtrees() == 0) B2FATAL("No file could be opened, aborting");
221  // Set cache size TODO: find out if files are remote and use a bigger default
222  // value if at least one file is non-local
223  if (m_cacheSize >= 0) m_tree->SetCacheSize(m_cacheSize * 1024 * 1024);
224 
225  // Check if the files we added to the Chain are unique,
226  // if the same file is added multiple times the TEventList used for the eventSequence feature
227  // will process each file only once with the union of both given sequences.
228  // It is not clear if the user wants this, so we raise a fatal in this situation.
229  {
230  std::set<std::string> unique_filenames;
231 
232  // The following lines are directly from the ROOT documentation
233  // see TChain::AddFile
234  TObjArray* fileElements = m_tree->GetListOfFiles();
235  TIter next(fileElements);
236  TChainElement* chEl = nullptr;
237  while ((chEl = (TChainElement*)next())) {
238  if (!unique_filenames.insert(chEl->GetTitle()).second) {
239  B2WARNING("The input file '" << chEl->GetTitle() << "' was specified more than once");
240  // seems we have duplicate files so we process events more than once. Disable forwarding of MC event number
241  m_processingAllEvents = false;
242  }
243  }
244  if ((unsigned int)m_tree->GetNtrees() != unique_filenames.size() && m_entrySequences.size() > 0) {
245  B2FATAL("You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
246  "Please specify each file only once if you're using the sequence feature!");
247  }
248  }
249 
250  if (m_entrySequences.size() > 0) {
251  auto* elist = new TEventList("input_event_list");
252  for (unsigned int iFile = 0; iFile < m_entrySequences.size(); ++iFile) {
253  int64_t offset = m_tree->GetTreeOffset()[iFile];
254  int64_t next_offset = m_tree->GetTreeOffset()[iFile + 1];
255  // check if Sequence consists only of ':', e.g. the whole file is requested
256  if (m_entrySequences[iFile] == ":") {
257  for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
258  elist->Enter(global_entry);
259  } else {
260  for (const auto& entry : generate_number_sequence(m_entrySequences[iFile])) {
261  int64_t global_entry = entry + offset;
262  if (global_entry >= next_offset) {
263  B2WARNING("Given sequence contains entry numbers which are out of range. "
264  "I won't add any further events to the EventList for the current file.");
265  break;
266  } else {
267  elist->Enter(global_entry);
268  }
269  }
270  }
271  }
272  m_tree->SetEventList(elist);
273  }
274 
275  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Persistent] + "'" << LogVar("entries", m_persistent->GetEntriesFast()));
276  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Event] + "'" << LogVar("entries", m_tree->GetEntriesFast()));
277 
278  connectBranches(m_persistent, DataStore::c_Persistent, &m_persistentStoreEntries);
279  readPersistentEntry(0);
280 
281  if (!connectBranches(m_tree, DataStore::c_Event, &m_storeEntries)) {
282  delete m_tree;
283  m_tree = nullptr; //don't try to read from there
284  } else {
285  InputController::setCanControlInput(true);
286  InputController::setChain(m_tree);
287  }
288 
289  if (m_parentLevel > 0) {
290  createParentStoreEntries();
291  } else if (m_parentLevel < 0) {
292  B2ERROR("parentLevel must be >= 0!");
293  return;
294  }
295 
296  // Let's check check if we process everything
297  // * all filenames unique (already done above)
298  // * no event skipping either with skipN, entry sequences or skipToEvent
299  // * no -n or process(path, N) with N <= the number of entries in our files
300  unsigned int maxEvent = Environment::Instance().getNumberEventsOverride();
301  m_processingAllEvents &= m_skipNEvents == 0 && m_entrySequences.size() == 0;
302  m_processingAllEvents &= (maxEvent == 0 || maxEvent >= InputController::numEntries());
303 
304  if (!m_skipToEvent.empty()) {
305  // Skipping to some specific event is also not processing all events ...
306  m_processingAllEvents = false;
307  // make sure the number of entries is exactly 3
308  if (m_skipToEvent.size() != 3) {
309  B2ERROR("skipToEvent must be a list of three values: experiment, run, event number");
310  // ignore the value
311  m_skipToEvent.clear();
312  } else {
313  InputController::setNextEntry(m_skipToEvent[0], m_skipToEvent[1], m_skipToEvent[2]);
314  }
315  if (m_nextEntry > 0) {
316  B2ERROR("You cannot supply a number of events to skip (skipNEvents) and an "
317  "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
318  //force the number of skipped events to be zero
319  m_nextEntry = 0;
320  }
321  }
322 
323  // Processing everything so forward number of MC events
324  if (m_processingAllEvents) {
325  Environment::Instance().setNumberOfMCEvents(sumInputMCEvents);
326  }
327  // And setup global tag replay ...
328  Conditions::Configuration::getInstance().setInputMetadata(fileMetaData);
329 }
330 
331 
332 void RootInputModule::event()
333 {
334  if (!m_tree)
335  return;
336 
337  while (true) {
338  const long nextEntry = InputController::getNextEntry();
339  if (nextEntry >= 0 && nextEntry < InputController::numEntries()) {
340  B2INFO("RootInput: will read entry " << nextEntry << " next.");
341  m_nextEntry = nextEntry;
342  } else if (InputController::getNextExperiment() >= 0 && InputController::getNextRun() >= 0
343  && InputController::getNextEvent() >= 0) {
344  const long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(m_tree->GetTree(), InputController::getNextEvent(),
345  InputController::getNextRun(), InputController::getNextExperiment());
346  if (entry >= 0) {
347  const long chainentry = m_tree->GetChainEntryNumber(entry);
348  B2INFO("RootInput: will read entry " << chainentry << " (entry " << entry << " in current file) next.");
349  m_nextEntry = chainentry;
350  } else {
351  B2ERROR("Couldn't find entry (" << InputController::getNextEvent() << ", " << InputController::getNextRun() << ", " <<
352  InputController::getNextExperiment() << ") in file! Loading entry " << m_nextEntry << " instead.");
353  }
354  }
355  InputController::eventLoaded(m_nextEntry);
356 
357  readTree();
358  m_nextEntry++;
359 
360  // check for events with errors
361  unsigned int errorFlag = 0;
362  if (m_discardErrorEvents && (m_nextEntry >= 0)) {
363  const StoreObjPtr<EventMetaData> eventMetaData;
364  errorFlag = eventMetaData->getErrorFlag();
365  if (errorFlag != 0) {
366  B2WARNING("Discarding corrupted event" << LogVar("errorFlag", errorFlag) << LogVar("experiment", eventMetaData->getExperiment())
367  << LogVar("run", eventMetaData->getRun()) << LogVar("event", eventMetaData->getEvent()));
368  }
369  }
370  if (errorFlag == 0) break;
371  }
372 }
373 
374 
375 void RootInputModule::terminate()
376 {
377  if (m_collectStatistics and m_tree) {
378  //add stats for last file
379  m_readStats.addFromFile(m_tree->GetFile());
380  }
381  delete m_tree;
382  delete m_persistent;
383  ReadStats parentReadStats;
384  for (const auto& entry : m_parentTrees) {
385  TFile* f = entry.second->GetCurrentFile();
386  if (m_collectStatistics)
387  parentReadStats.addFromFile(f);
388 
389  delete f;
390  }
391 
392  if (m_collectStatistics) {
393  B2INFO("Statistics for event tree: " << m_readStats.getString());
394  B2INFO("Statistics for event tree (parent files): " << parentReadStats.getString());
395  }
396 
397  for (auto& branch : m_connectedBranches) {
398  branch.clear();
399  }
400  m_storeEntries.clear();
401  m_persistentStoreEntries.clear();
402  m_parentStoreEntries.clear();
403  m_parentTrees.clear();
404 }
405 
406 
407 void RootInputModule::readTree()
408 {
409  if (!m_tree)
410  return;
411 
412  //keep snapshot of TFile stats (to use if it changes)
413  ReadStats currentEventStats;
414  if (m_collectStatistics) {
415  currentEventStats.addFromFile(m_tree->GetFile());
416  }
417 
418  // Check if there are still new entries available.
419  int localEntryNumber = m_nextEntry;
420  if (m_entrySequences.size() > 0) {
421  localEntryNumber = m_tree->GetEntryNumber(localEntryNumber);
422  }
423  localEntryNumber = m_tree->LoadTree(localEntryNumber);
424 
425  if (localEntryNumber == -2) {
426  m_nextEntry = -2;
427  return; //end of file
428  } else if (localEntryNumber < 0) {
429  B2FATAL("Failed to load tree, corrupt file? Check standard error for additional messages. (TChain::LoadTree() returned error " <<
430  localEntryNumber << ")");
431  }
432  B2DEBUG(39, "Reading file entry " << m_nextEntry);
433 
434  //Make sure transient members of objects are reinitialised
435  for (auto entry : m_storeEntries) {
436  entry->resetForGetEntry();
437  }
438  for (const auto& storeEntries : m_parentStoreEntries) {
439  for (auto entry : storeEntries) {
440  entry->resetForGetEntry();
441  }
442  }
443 
444  int bytesRead = m_tree->GetTree()->GetEntry(localEntryNumber);
445  if (bytesRead <= 0) {
446  B2FATAL("Could not read 'tree' entry " << m_nextEntry << " in file " << m_tree->GetCurrentFile()->GetName());
447  }
448 
449  //In case someone is tempted to change this:
450  // TTree::GetCurrentFile() returns a TFile pointer to a fixed location,
451  // calling GetName() on the TFile almost works as expected, but starts with the
452  // last file in a TChain. (-> we re-read the first persistent tree with TChain,
453  // with ill results for Mergeable objects.)
454  // GetTreeNumber() also starts at the last entry before we read the first event from m_tree,
455  // so we'll save the last persistent tree loaded and only reload on changes.
456  StoreObjPtr<FileMetaData> fileMetaData("", DataStore::c_Persistent);
457  const long treeNum = m_tree->GetTreeNumber();
458  const bool fileChanged = (m_lastPersistentEntry != treeNum);
459  if (fileChanged) {
460  if (m_collectStatistics) {
461  m_readStats.add(currentEventStats);
462  }
463  // file changed, read the FileMetaData object from the persistent tree and update the parent file metadata
464  readPersistentEntry(treeNum);
465  B2INFO("Loading new input file"
466  << LogVar("filename", m_tree->GetFile()->GetName())
467  << LogVar("metadata LFN", fileMetaData->getLfn()));
468  }
469  realDataWorkaround(*fileMetaData);
470 
471  for (auto entry : m_storeEntries) {
472  if (!entry->object) {
473  entryNotFound("Event durability tree (global entry: " + std::to_string(m_nextEntry) + ")", entry->name, fileChanged);
474  entry->recoverFromNullObject();
475  entry->ptr = nullptr;
476  } else {
477  entry->ptr = entry->object;
478  }
479  }
480 
481  if (m_parentLevel > 0) {
482  if (!readParentTrees())
483  B2FATAL("Could not read data from parent file!");
484  }
485 
486 }
487 
488 bool RootInputModule::connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries)
489 {
490  B2DEBUG(30, "File changed, loading persistent data.");
491  DataStore::StoreEntryMap& map = DataStore::Instance().getStoreEntryMap(durability);
492  //Go over the branchlist and connect the branches with DataStore entries
493  const TObjArray* branchesObjArray = tree->GetListOfBranches();
494  if (!branchesObjArray) {
495  B2FATAL("Tree '" << tree->GetName() << "' doesn't contain any branches!");
496  }
497  std::vector<TBranch*> branches;
498  set<string> branchList;
499  for (int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
500  auto* branch = static_cast<TBranch*>(branchesObjArray->At(jj));
501  if (!branch) continue;
502  branchList.insert(branch->GetName());
503  branches.emplace_back(branch);
504  // start with all branches disabled and only enable what we read
505  setBranchStatus(branch, false);
506  }
507  //skip branches the user doesn't want
508  branchList = filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability, true);
509  for (TBranch* branch : branches) {
510  const std::string branchName = branch->GetName();
511  //skip already connected branches
512  if (m_connectedBranches[durability].find(branchName) != m_connectedBranches[durability].end())
513  continue;
514 
515  if ((branchList.count(branchName) == 0) and
516  ((branchName != "FileMetaData") || (tree != m_persistent)) and
517  ((branchName != "EventMetaData") || (tree != m_tree))) {
518  continue;
519  }
520  auto found = setBranchStatus(branch, true);
521  B2DEBUG(32, "Enabling branch" << LogVar("branchName", branchName)
522  << LogVar("children found", found));
523 
524  //Get information about the object in the branch
525  TObject* objectPtr = nullptr;
526  branch->SetAddress(&objectPtr);
527  branch->GetEntry();
528  bool array = (string(branch->GetClassName()) == "TClonesArray");
529  TClass* objClass = nullptr;
530  if (array)
531  objClass = (static_cast<TClonesArray*>(objectPtr))->GetClass();
532  else
533  objClass = objectPtr->IsA();
534  delete objectPtr;
535 
536  //Create a DataStore entry and connect the branch address to it
537  if (!DataStore::Instance().registerEntry(branchName, durability, objClass, array, DataStore::c_WriteOut)) {
538  B2FATAL("Cannot connect branch to datastore" << LogVar("branchName", branchName));
539  continue;
540  }
541  DataStore::StoreEntry& entry = (map.find(branchName))->second;
542  tree->SetBranchAddress(branch->GetName(), &(entry.object));
543  if (storeEntries) storeEntries->push_back(&entry);
544 
545  //Keep track of already connected branches
546  m_connectedBranches[durability].insert(branchName);
547  }
548 
549  return true;
550 }
551 
552 
553 bool RootInputModule::createParentStoreEntries()
554 {
555  // get the experiment/run/event number and parentLfn of the first entry
556  TBranch* branch = m_tree->GetBranch("EventMetaData");
557  char* address = branch->GetAddress();
558  EventMetaData* eventMetaData = nullptr;
559  branch->SetAddress(&eventMetaData);
560  branch->GetEntry(0);
561  int experiment = eventMetaData->getExperiment();
562  int run = eventMetaData->getRun();
563  unsigned int event = eventMetaData->getEvent();
564  std::string parentLfn = eventMetaData->getParentLfn();
565  branch->SetAddress(address);
566 
567  // loop over parents and get their metadata
568  for (int level = 0; level < m_parentLevel; level++) {
569  // open the parent file
570  TDirectory* dir = gDirectory;
571  const std::string parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
572  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
573  dir->cd();
574  if (!file || !file->IsOpen()) {
575  B2ERROR("Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
576  << LogVar("LFN", parentLfn) << LogVar("PFN", parentPfn));
577  return false;
578  }
579 
580  // get the event tree and connect its branches
581  auto* tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
582  if (!tree) {
583  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
584  return false;
585  }
586  if (int(m_parentStoreEntries.size()) <= level) m_parentStoreEntries.resize(level + 1);
587  connectBranches(tree, DataStore::c_Event, &m_parentStoreEntries[level]);
588  m_parentTrees.insert(std::make_pair(parentLfn, tree));
589 
590  // get the persistent tree and read its branches
591  auto* persistent = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Persistent].c_str()));
592  if (!persistent) {
593  B2ERROR("No tree " << c_treeNames[DataStore::c_Persistent] << " found in " << parentPfn);
594  return false;
595  }
596  connectBranches(persistent, DataStore::c_Persistent, nullptr);
597 
598  // get parent LFN of parent
599  EventMetaData* metaData = nullptr;
600  tree->SetBranchAddress("EventMetaData", &metaData);
601  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
602  tree->GetBranch("EventMetaData")->GetEntry(entry);
603  parentLfn = metaData->getParentLfn();
604  }
605 
606  return true;
607 }
608 
609 
610 bool RootInputModule::readParentTrees()
611 {
612  const StoreObjPtr<EventMetaData> eventMetaData;
613  int experiment = eventMetaData->getExperiment();
614  int run = eventMetaData->getRun();
615  unsigned int event = eventMetaData->getEvent();
616 
617  std::string parentLfn = eventMetaData->getParentLfn();
618  for (int level = 0; level < m_parentLevel; level++) {
619  const std::string& parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
620 
621  // Open the parent file if we haven't done this already
622  TTree* tree = nullptr;
623  if (m_parentTrees.find(parentLfn) == m_parentTrees.end()) {
624  TDirectory* dir = gDirectory;
625  B2DEBUG(30, "Opening parent file" << LogVar("PFN", parentPfn));
626  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
627  dir->cd();
628  if (!file || !file->IsOpen()) {
629  B2ERROR("Couldn't open parent file " << parentPfn);
630  return false;
631  }
632  tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
633  if (!tree) {
634  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
635  return false;
636  }
637  for (auto entry : m_parentStoreEntries[level]) {
638  tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
639  }
640  m_parentTrees.insert(std::make_pair(parentLfn, tree));
641  } else {
642  tree = m_parentTrees[parentLfn];
643  }
644 
645  // get entry number in parent tree
646  long entryNumber = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
647  if (entryNumber < 0) {
648  B2ERROR("No event " << experiment << "/" << run << "/" << event << " in parent file " << parentPfn);
649  return false;
650  }
651 
652  // read the tree and mark the data read in the data store
653  EventMetaData* parentMetaData = nullptr;
654  tree->SetBranchAddress("EventMetaData", &parentMetaData);
655  tree->GetEntry(entryNumber);
656  for (auto entry : m_parentStoreEntries[level]) {
657  entry->ptr = entry->object;
658  }
659 
660  // set the parent LFN to the next level
661  parentLfn = parentMetaData->getParentLfn();
662  }
663 
664  addEventListForIndexFile(parentLfn);
665 
666  return true;
667 }
668 
669 void RootInputModule::addEventListForIndexFile(const std::string& parentLfn)
670 {
671  //is this really an index file? (=only EventMetaData stored)
672  if (!(m_parentLevel > 0 and m_storeEntries.size() == 1))
673  return;
674  //did we handle the current parent file already?
675  if (parentLfn == m_lastParentFileLFN)
676  return;
677  m_lastParentFileLFN = parentLfn;
678 
679  B2INFO("Index file detected, scanning to generate event list.");
680  TTree* tree = m_parentTrees.at(parentLfn);
681 
682  //both types of list work, TEventList seems to result in slightly less data being read.
683  auto* elist = new TEventList("parent_entrylist");
684  //TEntryListArray* elist = new TEntryListArray();
685 
686  TBranch* branch = m_tree->GetBranch("EventMetaData");
687  auto* address = branch->GetAddress();
688  EventMetaData* eventMetaData = nullptr;
689  branch->SetAddress(&eventMetaData);
690  long nEntries = m_tree->GetEntries();
691  for (long i = m_nextEntry; i < nEntries; i++) {
692  branch->GetEntry(i);
693  int experiment = eventMetaData->getExperiment();
694  int run = eventMetaData->getRun();
695  unsigned int event = eventMetaData->getEvent();
696  const std::string& newParentLfn = eventMetaData->getParentLfn();
697 
698  if (parentLfn != newParentLfn) {
699  //parent file changed, stopping for now
700  break;
701  }
702  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
703  elist->Enter(entry);
704  }
705  branch->SetAddress(address);
706 
707  if (tree) {
708  tree->SetEventList(elist);
709  //tree->SetEntryList(elist);
710  }
711 }
712 
713 void RootInputModule::entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged)
714 {
715  if (name == "ProcessStatistics" or DataStore::Instance().getDependencyMap().isUsedAs(name, DependencyMap::c_Input)) {
716  B2FATAL(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain required object " << name << ", aborting.");
717  } else if (fileChanged) {
718  B2WARNING(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain object " << name <<
719  " that was present in a previous entry.");
720  }
721 }
722 
723 void RootInputModule::readPersistentEntry(long fileEntry)
724 {
725  m_lastPersistentEntry = fileEntry;
726 
727  for (auto entry : m_persistentStoreEntries) {
728  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
729  TObject* copyOfPreviousVersion = nullptr;
730  if (isMergeable) {
731  copyOfPreviousVersion = entry->object->Clone();
732  }
733  entry->resetForGetEntry();
734  //ptr stores old value (or nullptr)
735  entry->ptr = copyOfPreviousVersion;
736  }
737 
738  int bytesRead = m_persistent->GetEntry(fileEntry);
739  if (bytesRead <= 0) {
740  const char* name = m_tree->GetCurrentFile() ? m_tree->GetCurrentFile()->GetName() : "<unknown>";
741  B2FATAL("Could not read 'persistent' TTree #" << fileEntry << " in file " << name);
742  }
743 
744  for (auto entry : m_persistentStoreEntries) {
745  if (entry->object) {
746  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
747  if (isMergeable) {
748  const Mergeable* oldObj = static_cast<Mergeable*>(entry->ptr);
749  auto* newObj = static_cast<Mergeable*>(entry->object);
750  newObj->merge(oldObj);
751 
752  delete entry->ptr;
753  }
754  entry->ptr = entry->object;
755  } else {
756  entryNotFound("Persistent tree", entry->name);
757  entry->recoverFromNullObject();
758  entry->ptr = nullptr;
759  }
760  }
761 }
762 
763 void RootInputModule::realDataWorkaround(FileMetaData& metaData)
764 {
765  if ((metaData.getSite().find("bfe0") == 0) && (metaData.getDate().compare("2019-06-30") < 0) &&
766  (metaData.getExperimentLow() > 0) && (metaData.getExperimentHigh() < 9) && (metaData.getRunLow() > 0)) {
767  metaData.declareRealData();
768  }
769 }
Belle2::RootIOUtilities::RootFileInfo::checkMissingBranches
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
Definition: RootFileInfo.cc:85
Belle2::StoreEntry::object
TObject * object
The pointer to the actual object.
Definition: StoreEntry.h:41
Belle2::RootInputModule::StoreEntries
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
Definition: RootInputModule.h:87
Belle2::FileMetaData::getSite
const std::string & getSite() const
Site where the file was created getter.
Definition: FileMetaData.h:105
Belle2::EventMetaData::getEvent
unsigned int getEvent() const
Event Getter.
Definition: EventMetaData.h:155
Belle2::FileMetaData::declareRealData
void declareRealData()
Declare that this is not generated, but real data.
Definition: FileMetaData.h:292
Belle2::RootIOUtilities::c_SteerBranchNames
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
Definition: RootIOUtilities.cc:21
Belle2::DataStore::StoreEntryMap
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
Definition: DataStore.h:89
REG_MODULE
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:652
Belle2::generate_number_sequence
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
Definition: NumberSequence.cc:30
Belle2::FileMetaData::getExperimentLow
int getExperimentLow() const
Lowest experiment number getter.
Definition: FileMetaData.h:55
Belle2::StoreEntry
Wraps a stored array/object, stored under unique (name, durability) key.
Definition: StoreEntry.h:15
Belle2::RootIOUtilities::c_SteerExcludeBranchNames
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
Definition: RootIOUtilities.cc:22
Belle2::RootIOUtilities::c_treeNames
const std::string c_treeNames[]
Names of trees.
Definition: RootIOUtilities.cc:20
Belle2::EventMetaData::getParentLfn
const std::string & getParentLfn() const
Return LFN of the current parent file, or an empty string if not set.
Definition: EventMetaData.h:190
Belle2::FileMetaData
Metadata information about a file.
Definition: FileMetaData.h:39
Belle2::RootInputModule::ReadStats
for collecting statistics over multiple files.
Definition: RootInputModule.h:196
Belle2::Module
Base class for Modules.
Definition: Module.h:74
Belle2::FileMetaData::getExperimentHigh
int getExperimentHigh() const
Highest experiment number getter.
Definition: FileMetaData.h:67
Belle2::ProcType::c_Input
@ c_Input
Input Process.
Belle2::RootIOUtilities::RootFileInfo::getFileMetaData
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
Definition: RootFileInfo.cc:51
Belle2::RootIOUtilities::RootFileInfo
Helper class to factorize some necessary tasks when working with Belle2 output files.
Definition: RootFileInfo.h:28
Belle2::FileMetaData::getRunLow
int getRunLow() const
Lowest run number getter.
Definition: FileMetaData.h:59
Belle2::RootInputModule
Module to read TTree data from file into the data store.
Definition: RootInputModule.h:51
Belle2::RootIOUtilities::expandWordExpansions
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
Definition: RootIOUtilities.cc:107
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::StoreObjPtr
Type-safe access to single objects in the data store.
Definition: ParticleList.h:33
Belle2::RootIOUtilities::filterBranches
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
Definition: RootIOUtilities.cc:25
LogVar
Class to store variables with their name which were sent to the logging service.
Definition: LogVariableStream.h:24
Belle2::RootIOUtilities::RootFileInfo::getBranchNames
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
Definition: RootFileInfo.cc:65
Belle2::EventMetaData::getExperiment
int getExperiment() const
Experiment Getter.
Definition: EventMetaData.h:174
Belle2::RootIOUtilities::setBranchStatus
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Definition: RootIOUtilities.cc:85
Belle2::EventMetaData::getRun
int getRun() const
Run Getter.
Definition: EventMetaData.h:161
Belle2::EventMetaData
Store event, run, and experiment numbers.
Definition: EventMetaData.h:43
Belle2::RootInputModule::ReadStats::addFromFile
void addFromFile(const TFile *f)
add current statistics from TFile object.
Definition: RootInputModule.h:208
Belle2::Mergeable::merge
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
Belle2::FileMetaData::getDate
const std::string & getDate() const
File creation date and time getter (UTC)
Definition: FileMetaData.h:101
Belle2::RootInputModule::ReadStats::getString
std::string getString() const
string suitable for printing.
Definition: RootInputModule.h:215
Belle2::DataStore::EDurability
EDurability
Durability types.
Definition: DataStore.h:60
Belle2::Mergeable
Abstract base class for objects that can be merged.
Definition: Mergeable.h:33