Belle II Software  release-05-02-19
RootInputModule.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2012 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Martin Heck, Christian Pulvermacher, Thomas Kuhr *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 
12 #include <framework/modules/rootio/RootInputModule.h>
13 
14 #include <framework/io/RootIOUtilities.h>
15 #include <framework/io/RootFileInfo.h>
16 #include <framework/core/FileCatalog.h>
17 #include <framework/core/InputController.h>
18 #include <framework/pcore/Mergeable.h>
19 #include <framework/datastore/StoreObjPtr.h>
20 #include <framework/datastore/DataStore.h>
21 #include <framework/datastore/DependencyMap.h>
22 #include <framework/dataobjects/EventMetaData.h>
23 #include <framework/utilities/NumberSequence.h>
24 #include <framework/utilities/ScopeGuard.h>
25 #include <framework/database/Configuration.h>
26 
27 #include <TClonesArray.h>
28 #include <TEventList.h>
29 #include <TObjArray.h>
30 #include <TChainElement.h>
31 #include <TError.h>
32 
33 #include <iomanip>
34 
35 using namespace std;
36 using namespace Belle2;
37 using namespace RootIOUtilities;
38 
39 REG_MODULE(RootInput)
40 
41 RootInputModule::RootInputModule() : Module(), m_nextEntry(0), m_lastPersistentEntry(-1), m_tree(nullptr), m_persistent(nullptr)
42 {
43  //Set module properties
44  setDescription("Reads objects/arrays from one or more .root files saved by the RootOutput module and makes them available through the DataStore. Files do not necessarily have to be local, http:// and root:// (for files in xrootd) URLs are supported as well.");
45  setPropertyFlags(c_Input);
46 
47  //Parameter definition
48  vector<string> emptyvector;
49  addParam("inputFileName", m_inputFileName,
50  "Input file name. For multiple files, use inputFileNames or wildcards instead. Can be overridden using the -i argument to basf2.",
51  string(""));
52  addParam("inputFileNames", m_inputFileNames,
53  "List of input files. You may use shell-like expansions to specify multiple files, e.g. 'somePrefix_*.root' or 'file_[a,b]_[1-15].root'. Can be overridden using the -i argument to basf2.",
54  emptyvector);
55  addParam("entrySequences", m_entrySequences,
56  "The number sequences (e.g. 23:42,101) defining the entries which are processed for each inputFileName."
57  "Must be specified exactly once for each file to be opened."
58  "The first event has the entry number 0.", emptyvector);
59  addParam("ignoreCommandLineOverride" , m_ignoreCommandLineOverride,
60  "Ignore override of file name via command line argument -i.", false);
61 
62  addParam("skipNEvents", m_skipNEvents, "Skip this number of events before starting.", 0u);
63  addParam("skipToEvent", m_skipToEvent, "Skip events until the event with "
64  "the specified (experiment, run, event number) occurs. This parameter "
65  "is useful for debugging to start with a specific event.", m_skipToEvent);
66 
67  addParam(c_SteerBranchNames[0], m_branchNames[0],
68  "Names of event durability branches to be read. Empty means all branches. (EventMetaData is always read)", emptyvector);
69  addParam(c_SteerBranchNames[1], m_branchNames[1],
70  "Names of persistent durability branches to be read. Empty means all branches. (FileMetaData is always read)", emptyvector);
71 
72  addParam(c_SteerExcludeBranchNames[0], m_excludeBranchNames[0],
73  "Names of event durability branches NOT to be read. Takes precedence over branchNames.", emptyvector);
74  vector<string> excludePersistent({"ProcessStatistics"});
75  addParam(c_SteerExcludeBranchNames[1], m_excludeBranchNames[1],
76  "Names of persistent durability branches NOT to be read. Takes precedence over branchNamesPersistent.", excludePersistent);
77 
78  addParam("parentLevel", m_parentLevel,
79  "Number of generations of parent files (files used as input when creating a file) to be read. This can be useful if a file is missing some information available in its parent. See https://confluence.desy.de/display/BI/Software+ParentFiles for details.",
80  0);
81 
82  addParam("collectStatistics" , m_collectStatistics,
83  "Collect statistics on amount of data read and print statistics (seperate for input & parent files) after processing. Data is collected from TFile using GetBytesRead(), GetBytesReadExtra(), GetReadCalls()",
84  false);
85  addParam("cacheSize", m_cacheSize,
86  "file cache size in Mbytes. If negative, use root default", 0);
87 
88  addParam("discardErrorEvents", m_discardErrorEvents,
89  "Discard events with an error flag != 0", m_discardErrorEvents);
90  addParam("silentErrrorDiscardMask", m_discardErrorMask,
91  "Bitmask of error flags to silently discard without raising a WARNING. Should be a combination of the ErrorFlags defined "
92  "in the EventMetaData. No Warning will be issued when discarding an event if the error flag consists exclusively of flags "
93  "present in this mask", m_discardErrorMask);
94 }
95 
96 RootInputModule::~RootInputModule() = default;
97 
98 void RootInputModule::initialize()
99 {
100  unsigned int skipNEventsOverride = Environment::Instance().getSkipEventsOverride();
101  if (skipNEventsOverride != 0)
102  m_skipNEvents = skipNEventsOverride;
103 
104  auto entrySequencesOverride = Environment::Instance().getEntrySequencesOverride();
105  if (entrySequencesOverride.size() > 0)
106  m_entrySequences = entrySequencesOverride;
107 
108  m_nextEntry = m_skipNEvents;
109  m_lastPersistentEntry = -1;
110  m_lastParentFileLFN = "";
111 
112  const vector<string>& inputFiles = getFileNames();
113  if (inputFiles.empty()) {
114  B2FATAL("You have to set either the 'inputFileName' or the 'inputFileNames' parameter, or start basf2 with the '-i MyFile.root' option.");
115  }
116  if (!m_inputFileName.empty() && !m_inputFileNames.empty()) {
117  B2FATAL("Cannot use both 'inputFileName' and 'inputFileNames' parameters!");
118  }
119  m_inputFileNames = expandWordExpansions(inputFiles);
120  if (m_inputFileNames.empty()) {
121  B2FATAL("No valid files specified!");
122  }
123 
124  if (m_entrySequences.size() > 0 and m_inputFileNames.size() != m_entrySequences.size()) {
125  B2FATAL("Number of provided filenames does not match the number of given entrySequences parameters: len(inputFileNames) = "
126  << m_inputFileNames.size() << " len(entrySequences) = " << m_entrySequences.size());
127  }
128 
129  m_inputFileName = "";
130  // we'll only use m_inputFileNames from now on
131 
132  // so let's create the chain objects ...
133  m_persistent = new TChain(c_treeNames[DataStore::c_Persistent].c_str());
134  m_tree = new TChain(c_treeNames[DataStore::c_Event].c_str());
135 
136  // time for sanity checks. The problem is that this needs to read a few bytes
137  // from every input file so for jobs with large amount of input files this
138  // will not be efficient.
139  // TODO: We might want to create a different input module which will not
140  // check anything and require manual input like the number of events in
141  // each file and the global tags to use. That would be more efficient
142  // but also less safe
143 
144  // list of required branches. We keep this empty for now and only fill
145  // it after we checked the first file to make sure all other files have the
146  // same branches available.
147  std::set<std::string> requiredEventBranches;
148  std::set<std::string> requiredPersistentBranches;
149  // Event metadata from all files, keep it around for sanity checks and globaltag replay
150  std::vector<FileMetaData> fileMetaData;
151  // and if so, what is the sum
152  std::result_of<decltype(&FileMetaData::getMcEvents)(FileMetaData)>::type sumInputMCEvents{0};
153 
154  // scope for local variables
155  {
156  // temporarily disable some root warnings
157  auto rootWarningGuard = ScopeGuard::guardValue(gErrorIgnoreLevel, kWarning + 1);
158  // do all files have a consistent number of MC events? that is all positive or all zero
159  bool validInputMCEvents{true};
160  for (const string& fileName : m_inputFileNames) {
161  // read metadata and create sum of MCEvents and global tags
162  try {
163  RootIOUtilities::RootFileInfo fileInfo(fileName);
164  FileMetaData meta = fileInfo.getFileMetaData();
165  if (meta.getNEvents() == 0) {
166  B2WARNING("File appears to be empty, skipping" << LogVar("filename", fileName));
167  continue;
168  }
169  realDataWorkaround(meta);
170  fileMetaData.push_back(meta);
171  // make sure we only look at data or MC. For the first file this is trivially true
172  if (fileMetaData.front().isMC() != meta.isMC()) {
173  throw std::runtime_error("Mixing real data and simulated data for input files is not supported");
174  }
175  // accumulate number of inputMCEvents now
176  if (validInputMCEvents) {
177  // make sure that all files have either a non-zero or zero mcevents.
178  if ((sumInputMCEvents > 0 and meta.getMcEvents() == 0)) {
179  B2WARNING("inconsistent input files: zero mcEvents, setting total number of MC events to zero" << LogVar("filename", fileName));
180  validInputMCEvents = false;
181  }
182  // So accumulate the number of MCEvents but let's be careful to not have an overflow here
183  if (__builtin_add_overflow(sumInputMCEvents, meta.getMcEvents(), &sumInputMCEvents)) {
184  B2FATAL("Number of MC events is too large and cannot be represented anymore");
185  }
186  }
187  // for the first file we don't know what branches are required but now we can determine them as we know the file can be opened
188  if (requiredEventBranches.empty()) {
189  // make sure we have event meta data
190  fileInfo.checkMissingBranches({"EventMetaData"}, false);
191  requiredEventBranches = fileInfo.getBranchNames(false);
192  // filter the branches depending on what the user selected. Note we
193  // do the same thing again in connectBranches but we leave it like
194  // that because we also want to read branches from parent files
195  // selectively and thus we need to filter the branches there anyway.
196  // Here we just do it to ensure all files we read directly (which is
197  // 99% of the use case) contain all the branches we want.
198  requiredEventBranches = RootIOUtilities::filterBranches(requiredEventBranches, m_branchNames[DataStore::c_Event],
199  m_excludeBranchNames[DataStore::c_Event], DataStore::c_Event);
200  // but make sure we always have EventMetaData ...
201  requiredEventBranches.emplace("EventMetaData");
202 
203  // Same for persistent data ...
204  requiredPersistentBranches = fileInfo.getBranchNames(true);
205  // filter the branches depending on what the user selected
206  requiredPersistentBranches = RootIOUtilities::filterBranches(requiredPersistentBranches, m_branchNames[DataStore::c_Persistent],
207  m_excludeBranchNames[DataStore::c_Persistent], DataStore::c_Persistent);
208  } else {
209  // ok we already have the list ... so let's make sure following files have the same branches
210  fileInfo.checkMissingBranches(requiredEventBranches, false);
211  fileInfo.checkMissingBranches(requiredPersistentBranches, true);
212  }
213  // ok, so now we have the file, add it to the chain. We trust the amount of events from metadata here.
214  if (m_tree->AddFile(fileName.c_str(), meta.getNEvents()) == 0 || m_persistent->AddFile(fileName.c_str(), 1) == 0) {
215  throw std::runtime_error("Could not add file to TChain");
216  }
217  B2INFO("Added file " + fileName);
218  } catch (std::exception& e) {
219  B2FATAL("Could not open input file " << std::quoted(fileName) << ": " << e.what());
220  }
221  }
222  }
223 
224  if (m_tree->GetNtrees() == 0) B2FATAL("No file could be opened, aborting");
225  // Set cache size TODO: find out if files are remote and use a bigger default
226  // value if at least one file is non-local
227  if (m_cacheSize >= 0) m_tree->SetCacheSize(m_cacheSize * 1024 * 1024);
228 
229  // Check if the files we added to the Chain are unique,
230  // if the same file is added multiple times the TEventList used for the eventSequence feature
231  // will process each file only once with the union of both given sequences.
232  // It is not clear if the user wants this, so we raise a fatal in this situation.
233  {
234  std::set<std::string> unique_filenames;
235 
236  // The following lines are directly from the ROOT documentation
237  // see TChain::AddFile
238  TObjArray* fileElements = m_tree->GetListOfFiles();
239  TIter next(fileElements);
240  TChainElement* chEl = nullptr;
241  while ((chEl = (TChainElement*)next())) {
242  if (!unique_filenames.insert(chEl->GetTitle()).second) {
243  B2WARNING("The input file '" << chEl->GetTitle() << "' was specified more than once");
244  // seems we have duplicate files so we process events more than once. Disable forwarding of MC event number
245  m_processingAllEvents = false;
246  }
247  }
248  if ((unsigned int)m_tree->GetNtrees() != unique_filenames.size() && m_entrySequences.size() > 0) {
249  B2FATAL("You specified a file multiple times, and specified a sequence of entries which should be used for each file. "
250  "Please specify each file only once if you're using the sequence feature!");
251  }
252  }
253 
254  if (m_entrySequences.size() > 0) {
255  auto* elist = new TEventList("input_event_list");
256  for (unsigned int iFile = 0; iFile < m_entrySequences.size(); ++iFile) {
257  int64_t offset = m_tree->GetTreeOffset()[iFile];
258  int64_t next_offset = m_tree->GetTreeOffset()[iFile + 1];
259  // check if Sequence consists only of ':', e.g. the whole file is requested
260  if (m_entrySequences[iFile] == ":") {
261  for (int64_t global_entry = offset; global_entry < next_offset; ++global_entry)
262  elist->Enter(global_entry);
263  } else {
264  for (const auto& entry : generate_number_sequence(m_entrySequences[iFile])) {
265  int64_t global_entry = entry + offset;
266  if (global_entry >= next_offset) {
267  B2WARNING("Given sequence contains entry numbers which are out of range. "
268  "I won't add any further events to the EventList for the current file.");
269  break;
270  } else {
271  elist->Enter(global_entry);
272  }
273  }
274  }
275  }
276  m_tree->SetEventList(elist);
277  }
278 
279  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Persistent] + "'" << LogVar("entries", m_persistent->GetEntriesFast()));
280  B2DEBUG(33, "Opened tree '" + c_treeNames[DataStore::c_Event] + "'" << LogVar("entries", m_tree->GetEntriesFast()));
281 
282  connectBranches(m_persistent, DataStore::c_Persistent, &m_persistentStoreEntries);
283  readPersistentEntry(0);
284 
285  if (!connectBranches(m_tree, DataStore::c_Event, &m_storeEntries)) {
286  delete m_tree;
287  m_tree = nullptr; //don't try to read from there
288  } else {
289  InputController::setCanControlInput(true);
290  InputController::setChain(m_tree);
291  }
292 
293  if (m_parentLevel > 0) {
294  createParentStoreEntries();
295  } else if (m_parentLevel < 0) {
296  B2ERROR("parentLevel must be >= 0!");
297  return;
298  }
299 
300  // Let's check check if we process everything
301  // * all filenames unique (already done above)
302  // * no event skipping either with skipN, entry sequences or skipToEvent
303  // * no -n or process(path, N) with N <= the number of entries in our files
304  unsigned int maxEvent = Environment::Instance().getNumberEventsOverride();
305  m_processingAllEvents &= m_skipNEvents == 0 && m_entrySequences.size() == 0;
306  m_processingAllEvents &= (maxEvent == 0 || maxEvent >= InputController::numEntries());
307 
308  if (!m_skipToEvent.empty()) {
309  // Skipping to some specific event is also not processing all events ...
310  m_processingAllEvents = false;
311  // make sure the number of entries is exactly 3
312  if (m_skipToEvent.size() != 3) {
313  B2ERROR("skipToEvent must be a list of three values: experiment, run, event number");
314  // ignore the value
315  m_skipToEvent.clear();
316  } else {
317  InputController::setNextEntry(m_skipToEvent[0], m_skipToEvent[1], m_skipToEvent[2]);
318  }
319  if (m_nextEntry > 0) {
320  B2ERROR("You cannot supply a number of events to skip (skipNEvents) and an "
321  "event to skip to (skipToEvent) at the same time, ignoring skipNEvents");
322  //force the number of skipped events to be zero
323  m_nextEntry = 0;
324  }
325  }
326 
327  // Processing everything so forward number of MC events
328  if (m_processingAllEvents) {
329  Environment::Instance().setNumberOfMCEvents(sumInputMCEvents);
330  }
331  // And setup global tag replay ...
332  Conditions::Configuration::getInstance().setInputMetadata(fileMetaData);
333 }
334 
335 
336 void RootInputModule::event()
337 {
338  if (!m_tree)
339  return;
340 
341  while (true) {
342  const long nextEntry = InputController::getNextEntry();
343  if (nextEntry >= 0 && nextEntry < InputController::numEntries()) {
344  B2INFO("RootInput: will read entry " << nextEntry << " next.");
345  m_nextEntry = nextEntry;
346  } else if (InputController::getNextExperiment() >= 0 && InputController::getNextRun() >= 0
347  && InputController::getNextEvent() >= 0) {
348  const long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(m_tree->GetTree(), InputController::getNextEvent(),
349  InputController::getNextRun(), InputController::getNextExperiment());
350  if (entry >= 0) {
351  const long chainentry = m_tree->GetChainEntryNumber(entry);
352  B2INFO("RootInput: will read entry " << chainentry << " (entry " << entry << " in current file) next.");
353  m_nextEntry = chainentry;
354  } else {
355  B2ERROR("Couldn't find entry (" << InputController::getNextEvent() << ", " << InputController::getNextRun() << ", " <<
356  InputController::getNextExperiment() << ") in file! Loading entry " << m_nextEntry << " instead.");
357  }
358  }
359  InputController::eventLoaded(m_nextEntry);
360 
361  readTree();
362  m_nextEntry++;
363 
364  // check for events with errors
365  unsigned int errorFlag = 0;
366  if (m_discardErrorEvents && (m_nextEntry >= 0)) {
367  const StoreObjPtr<EventMetaData> eventMetaData;
368  errorFlag = eventMetaData->getErrorFlag();
369  if (errorFlag != 0) {
370  if (errorFlag & ~m_discardErrorMask) {
371  B2WARNING("Discarding corrupted event" << LogVar("errorFlag", errorFlag) << LogVar("experiment", eventMetaData->getExperiment())
372  << LogVar("run", eventMetaData->getRun()) << LogVar("event", eventMetaData->getEvent()));
373  }
374  // make sure this event is not used if it's the last one in the file
375  eventMetaData->setEndOfData();
376  }
377  }
378  if (errorFlag == 0) break;
379  }
380 }
381 
382 
383 void RootInputModule::terminate()
384 {
385  if (m_collectStatistics and m_tree) {
386  //add stats for last file
387  m_readStats.addFromFile(m_tree->GetFile());
388  }
389  delete m_tree;
390  delete m_persistent;
391  ReadStats parentReadStats;
392  for (const auto& entry : m_parentTrees) {
393  TFile* f = entry.second->GetCurrentFile();
394  if (m_collectStatistics)
395  parentReadStats.addFromFile(f);
396 
397  delete f;
398  }
399 
400  if (m_collectStatistics) {
401  B2INFO("Statistics for event tree: " << m_readStats.getString());
402  B2INFO("Statistics for event tree (parent files): " << parentReadStats.getString());
403  }
404 
405  for (auto& branch : m_connectedBranches) {
406  branch.clear();
407  }
408  m_storeEntries.clear();
409  m_persistentStoreEntries.clear();
410  m_parentStoreEntries.clear();
411  m_parentTrees.clear();
412 }
413 
414 
415 void RootInputModule::readTree()
416 {
417  if (!m_tree)
418  return;
419 
420  //keep snapshot of TFile stats (to use if it changes)
421  ReadStats currentEventStats;
422  if (m_collectStatistics) {
423  currentEventStats.addFromFile(m_tree->GetFile());
424  }
425 
426  // Check if there are still new entries available.
427  int localEntryNumber = m_nextEntry;
428  if (m_entrySequences.size() > 0) {
429  localEntryNumber = m_tree->GetEntryNumber(localEntryNumber);
430  }
431  localEntryNumber = m_tree->LoadTree(localEntryNumber);
432 
433  if (localEntryNumber == -2) {
434  m_nextEntry = -2;
435  return; //end of file
436  } else if (localEntryNumber < 0) {
437  B2FATAL("Failed to load tree, corrupt file? Check standard error for additional messages. (TChain::LoadTree() returned error " <<
438  localEntryNumber << ")");
439  }
440  B2DEBUG(39, "Reading file entry " << m_nextEntry);
441 
442  //Make sure transient members of objects are reinitialised
443  for (auto entry : m_storeEntries) {
444  entry->resetForGetEntry();
445  }
446  for (const auto& storeEntries : m_parentStoreEntries) {
447  for (auto entry : storeEntries) {
448  entry->resetForGetEntry();
449  }
450  }
451 
452  int bytesRead = m_tree->GetTree()->GetEntry(localEntryNumber);
453  if (bytesRead <= 0) {
454  B2FATAL("Could not read 'tree' entry " << m_nextEntry << " in file " << m_tree->GetCurrentFile()->GetName());
455  }
456 
457  //In case someone is tempted to change this:
458  // TTree::GetCurrentFile() returns a TFile pointer to a fixed location,
459  // calling GetName() on the TFile almost works as expected, but starts with the
460  // last file in a TChain. (-> we re-read the first persistent tree with TChain,
461  // with ill results for Mergeable objects.)
462  // GetTreeNumber() also starts at the last entry before we read the first event from m_tree,
463  // so we'll save the last persistent tree loaded and only reload on changes.
464  StoreObjPtr<FileMetaData> fileMetaData("", DataStore::c_Persistent);
465  const long treeNum = m_tree->GetTreeNumber();
466  const bool fileChanged = (m_lastPersistentEntry != treeNum);
467  if (fileChanged) {
468  if (m_collectStatistics) {
469  m_readStats.add(currentEventStats);
470  }
471  // file changed, read the FileMetaData object from the persistent tree and update the parent file metadata
472  readPersistentEntry(treeNum);
473  B2INFO("Loading new input file"
474  << LogVar("filename", m_tree->GetFile()->GetName())
475  << LogVar("metadata LFN", fileMetaData->getLfn()));
476  }
477  realDataWorkaround(*fileMetaData);
478 
479  for (auto entry : m_storeEntries) {
480  if (!entry->object) {
481  entryNotFound("Event durability tree (global entry: " + std::to_string(m_nextEntry) + ")", entry->name, fileChanged);
482  entry->recoverFromNullObject();
483  entry->ptr = nullptr;
484  } else {
485  entry->ptr = entry->object;
486  }
487  }
488 
489  if (m_parentLevel > 0) {
490  if (!readParentTrees())
491  B2FATAL("Could not read data from parent file!");
492  }
493 
494 }
495 
496 bool RootInputModule::connectBranches(TTree* tree, DataStore::EDurability durability, StoreEntries* storeEntries)
497 {
498  B2DEBUG(30, "File changed, loading persistent data.");
499  DataStore::StoreEntryMap& map = DataStore::Instance().getStoreEntryMap(durability);
500  //Go over the branchlist and connect the branches with DataStore entries
501  const TObjArray* branchesObjArray = tree->GetListOfBranches();
502  if (!branchesObjArray) {
503  B2FATAL("Tree '" << tree->GetName() << "' doesn't contain any branches!");
504  }
505  std::vector<TBranch*> branches;
506  set<string> branchList;
507  for (int jj = 0; jj < branchesObjArray->GetEntriesFast(); jj++) {
508  auto* branch = static_cast<TBranch*>(branchesObjArray->At(jj));
509  if (!branch) continue;
510  branchList.insert(branch->GetName());
511  branches.emplace_back(branch);
512  // start with all branches disabled and only enable what we read
513  setBranchStatus(branch, false);
514  }
515  //skip branches the user doesn't want
516  branchList = filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability, true);
517  for (TBranch* branch : branches) {
518  const std::string branchName = branch->GetName();
519  //skip already connected branches
520  if (m_connectedBranches[durability].find(branchName) != m_connectedBranches[durability].end())
521  continue;
522 
523  if ((branchList.count(branchName) == 0) and
524  ((branchName != "FileMetaData") || (tree != m_persistent)) and
525  ((branchName != "EventMetaData") || (tree != m_tree))) {
526  continue;
527  }
528  auto found = setBranchStatus(branch, true);
529  B2DEBUG(32, "Enabling branch" << LogVar("branchName", branchName)
530  << LogVar("children found", found));
531 
532  //Get information about the object in the branch
533  TObject* objectPtr = nullptr;
534  branch->SetAddress(&objectPtr);
535  branch->GetEntry();
536  bool array = (string(branch->GetClassName()) == "TClonesArray");
537  TClass* objClass = nullptr;
538  if (array)
539  objClass = (static_cast<TClonesArray*>(objectPtr))->GetClass();
540  else
541  objClass = objectPtr->IsA();
542  delete objectPtr;
543 
544  //Create a DataStore entry and connect the branch address to it
545  if (!DataStore::Instance().registerEntry(branchName, durability, objClass, array, DataStore::c_WriteOut)) {
546  B2FATAL("Cannot connect branch to datastore" << LogVar("branchName", branchName));
547  continue;
548  }
549  DataStore::StoreEntry& entry = (map.find(branchName))->second;
550  tree->SetBranchAddress(branch->GetName(), &(entry.object));
551  if (storeEntries) storeEntries->push_back(&entry);
552 
553  //Keep track of already connected branches
554  m_connectedBranches[durability].insert(branchName);
555  }
556 
557  return true;
558 }
559 
560 
561 bool RootInputModule::createParentStoreEntries()
562 {
563  // get the experiment/run/event number and parentLfn of the first entry
564  TBranch* branch = m_tree->GetBranch("EventMetaData");
565  char* address = branch->GetAddress();
566  EventMetaData* eventMetaData = nullptr;
567  branch->SetAddress(&eventMetaData);
568  branch->GetEntry(0);
569  int experiment = eventMetaData->getExperiment();
570  int run = eventMetaData->getRun();
571  unsigned int event = eventMetaData->getEvent();
572  std::string parentLfn = eventMetaData->getParentLfn();
573  branch->SetAddress(address);
574 
575  // loop over parents and get their metadata
576  for (int level = 0; level < m_parentLevel; level++) {
577  // open the parent file
578  TDirectory* dir = gDirectory;
579  const std::string parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
580  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
581  dir->cd();
582  if (!file || !file->IsOpen()) {
583  B2ERROR("Couldn't open parent file. Maybe you need to create a file catalog using b2file-catalog-add?"
584  << LogVar("LFN", parentLfn) << LogVar("PFN", parentPfn));
585  return false;
586  }
587 
588  // get the event tree and connect its branches
589  auto* tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
590  if (!tree) {
591  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
592  return false;
593  }
594  if (int(m_parentStoreEntries.size()) <= level) m_parentStoreEntries.resize(level + 1);
595  connectBranches(tree, DataStore::c_Event, &m_parentStoreEntries[level]);
596  m_parentTrees.insert(std::make_pair(parentLfn, tree));
597 
598  // get the persistent tree and read its branches
599  auto* persistent = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Persistent].c_str()));
600  if (!persistent) {
601  B2ERROR("No tree " << c_treeNames[DataStore::c_Persistent] << " found in " << parentPfn);
602  return false;
603  }
604  connectBranches(persistent, DataStore::c_Persistent, nullptr);
605 
606  // get parent LFN of parent
607  EventMetaData* metaData = nullptr;
608  tree->SetBranchAddress("EventMetaData", &metaData);
609  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
610  tree->GetBranch("EventMetaData")->GetEntry(entry);
611  parentLfn = metaData->getParentLfn();
612  }
613 
614  return true;
615 }
616 
617 
618 bool RootInputModule::readParentTrees()
619 {
620  const StoreObjPtr<EventMetaData> eventMetaData;
621  int experiment = eventMetaData->getExperiment();
622  int run = eventMetaData->getRun();
623  unsigned int event = eventMetaData->getEvent();
624 
625  std::string parentLfn = eventMetaData->getParentLfn();
626  for (int level = 0; level < m_parentLevel; level++) {
627  const std::string& parentPfn = FileCatalog::Instance().getPhysicalFileName(parentLfn);
628 
629  // Open the parent file if we haven't done this already
630  TTree* tree = nullptr;
631  if (m_parentTrees.find(parentLfn) == m_parentTrees.end()) {
632  TDirectory* dir = gDirectory;
633  B2DEBUG(30, "Opening parent file" << LogVar("PFN", parentPfn));
634  TFile* file = TFile::Open(parentPfn.c_str(), "READ");
635  dir->cd();
636  if (!file || !file->IsOpen()) {
637  B2ERROR("Couldn't open parent file " << parentPfn);
638  return false;
639  }
640  tree = dynamic_cast<TTree*>(file->Get(c_treeNames[DataStore::c_Event].c_str()));
641  if (!tree) {
642  B2ERROR("No tree " << c_treeNames[DataStore::c_Event] << " found in " << parentPfn);
643  return false;
644  }
645  for (auto entry : m_parentStoreEntries[level]) {
646  tree->SetBranchAddress(entry->name.c_str(), &(entry->object));
647  }
648  m_parentTrees.insert(std::make_pair(parentLfn, tree));
649  } else {
650  tree = m_parentTrees[parentLfn];
651  }
652 
653  // get entry number in parent tree
654  long entryNumber = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
655  if (entryNumber < 0) {
656  B2ERROR("No event " << experiment << "/" << run << "/" << event << " in parent file " << parentPfn);
657  return false;
658  }
659 
660  // read the tree and mark the data read in the data store
661  EventMetaData* parentMetaData = nullptr;
662  tree->SetBranchAddress("EventMetaData", &parentMetaData);
663  tree->GetEntry(entryNumber);
664  for (auto entry : m_parentStoreEntries[level]) {
665  entry->ptr = entry->object;
666  }
667 
668  // set the parent LFN to the next level
669  parentLfn = parentMetaData->getParentLfn();
670  }
671 
672  addEventListForIndexFile(parentLfn);
673 
674  return true;
675 }
676 
677 void RootInputModule::addEventListForIndexFile(const std::string& parentLfn)
678 {
679  //is this really an index file? (=only EventMetaData stored)
680  if (!(m_parentLevel > 0 and m_storeEntries.size() == 1))
681  return;
682  //did we handle the current parent file already?
683  if (parentLfn == m_lastParentFileLFN)
684  return;
685  m_lastParentFileLFN = parentLfn;
686 
687  B2INFO("Index file detected, scanning to generate event list.");
688  TTree* tree = m_parentTrees.at(parentLfn);
689 
690  //both types of list work, TEventList seems to result in slightly less data being read.
691  auto* elist = new TEventList("parent_entrylist");
692  //TEntryListArray* elist = new TEntryListArray();
693 
694  TBranch* branch = m_tree->GetBranch("EventMetaData");
695  auto* address = branch->GetAddress();
696  EventMetaData* eventMetaData = nullptr;
697  branch->SetAddress(&eventMetaData);
698  long nEntries = m_tree->GetEntries();
699  for (long i = m_nextEntry; i < nEntries; i++) {
700  branch->GetEntry(i);
701  int experiment = eventMetaData->getExperiment();
702  int run = eventMetaData->getRun();
703  unsigned int event = eventMetaData->getEvent();
704  const std::string& newParentLfn = eventMetaData->getParentLfn();
705 
706  if (parentLfn != newParentLfn) {
707  //parent file changed, stopping for now
708  break;
709  }
710  long entry = RootIOUtilities::getEntryNumberWithEvtRunExp(tree, event, run, experiment);
711  elist->Enter(entry);
712  }
713  branch->SetAddress(address);
714 
715  if (tree) {
716  tree->SetEventList(elist);
717  //tree->SetEntryList(elist);
718  }
719 }
720 
721 void RootInputModule::entryNotFound(const std::string& entryOrigin, const std::string& name, bool fileChanged)
722 {
723  if (name == "ProcessStatistics" or DataStore::Instance().getDependencyMap().isUsedAs(name, DependencyMap::c_Input)) {
724  B2FATAL(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain required object " << name << ", aborting.");
725  } else if (fileChanged) {
726  B2WARNING(entryOrigin << " in " << m_tree->GetFile()->GetName() << " does not contain object " << name <<
727  " that was present in a previous entry.");
728  }
729 }
730 
731 void RootInputModule::readPersistentEntry(long fileEntry)
732 {
733  m_lastPersistentEntry = fileEntry;
734 
735  for (auto entry : m_persistentStoreEntries) {
736  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
737  TObject* copyOfPreviousVersion = nullptr;
738  if (isMergeable) {
739  copyOfPreviousVersion = entry->object->Clone();
740  }
741  entry->resetForGetEntry();
742  //ptr stores old value (or nullptr)
743  entry->ptr = copyOfPreviousVersion;
744  }
745 
746  int bytesRead = m_persistent->GetEntry(fileEntry);
747  if (bytesRead <= 0) {
748  const char* name = m_tree->GetCurrentFile() ? m_tree->GetCurrentFile()->GetName() : "<unknown>";
749  B2FATAL("Could not read 'persistent' TTree #" << fileEntry << " in file " << name);
750  }
751 
752  for (auto entry : m_persistentStoreEntries) {
753  if (entry->object) {
754  bool isMergeable = entry->object->InheritsFrom(Mergeable::Class());
755  if (isMergeable) {
756  const Mergeable* oldObj = static_cast<Mergeable*>(entry->ptr);
757  auto* newObj = static_cast<Mergeable*>(entry->object);
758  newObj->merge(oldObj);
759 
760  delete entry->ptr;
761  }
762  entry->ptr = entry->object;
763  } else {
764  entryNotFound("Persistent tree", entry->name);
765  entry->recoverFromNullObject();
766  entry->ptr = nullptr;
767  }
768  }
769 }
770 
771 void RootInputModule::realDataWorkaround(FileMetaData& metaData)
772 {
773  if ((metaData.getSite().find("bfe0") == 0) && (metaData.getDate().compare("2019-06-30") < 0) &&
774  (metaData.getExperimentLow() > 0) && (metaData.getExperimentHigh() < 9) && (metaData.getRunLow() > 0)) {
775  metaData.declareRealData();
776  }
777 }
Belle2::RootIOUtilities::RootFileInfo::checkMissingBranches
void checkMissingBranches(const std::set< std::string > &required, bool persistent=false)
Check if the event or persistent tree contain at least all the branches in the set of required branch...
Definition: RootFileInfo.cc:85
Belle2::StoreEntry::object
TObject * object
The pointer to the actual object.
Definition: StoreEntry.h:41
Belle2::RootInputModule::StoreEntries
std::vector< DataStore::StoreEntry * > StoreEntries
Vector of entries in the data store.
Definition: RootInputModule.h:88
Belle2::FileMetaData::getSite
const std::string & getSite() const
Site where the file was created getter.
Definition: FileMetaData.h:105
Belle2::EventMetaData::getEvent
unsigned int getEvent() const
Event Getter.
Definition: EventMetaData.h:155
Belle2::FileMetaData::declareRealData
void declareRealData()
Declare that this is not generated, but real data.
Definition: FileMetaData.h:292
Belle2::RootIOUtilities::c_SteerBranchNames
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
Definition: RootIOUtilities.cc:21
Belle2::DataStore::StoreEntryMap
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
Definition: DataStore.h:89
REG_MODULE
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:652
Belle2::generate_number_sequence
std::set< int64_t > generate_number_sequence(const std::string &str)
Generate a sequence of numbers defined by a string.
Definition: NumberSequence.cc:30
Belle2::FileMetaData::getExperimentLow
int getExperimentLow() const
Lowest experiment number getter.
Definition: FileMetaData.h:55
Belle2::StoreEntry
Wraps a stored array/object, stored under unique (name, durability) key.
Definition: StoreEntry.h:15
Belle2::RootIOUtilities::c_SteerExcludeBranchNames
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
Definition: RootIOUtilities.cc:22
Belle2::RootIOUtilities::c_treeNames
const std::string c_treeNames[]
Names of trees.
Definition: RootIOUtilities.cc:20
Belle2::EventMetaData::getParentLfn
const std::string & getParentLfn() const
Return LFN of the current parent file, or an empty string if not set.
Definition: EventMetaData.h:190
Belle2::FileMetaData
Metadata information about a file.
Definition: FileMetaData.h:39
Belle2::RootInputModule::ReadStats
for collecting statistics over multiple files.
Definition: RootInputModule.h:197
Belle2::Module
Base class for Modules.
Definition: Module.h:74
Belle2::FileMetaData::getExperimentHigh
int getExperimentHigh() const
Highest experiment number getter.
Definition: FileMetaData.h:67
Belle2::ProcType::c_Input
@ c_Input
Input Process.
Belle2::RootIOUtilities::RootFileInfo::getFileMetaData
const FileMetaData & getFileMetaData()
Return the event metadata from the file.
Definition: RootFileInfo.cc:51
Belle2::RootIOUtilities::RootFileInfo
Helper class to factorize some necessary tasks when working with Belle2 output files.
Definition: RootFileInfo.h:28
Belle2::FileMetaData::getRunLow
int getRunLow() const
Lowest run number getter.
Definition: FileMetaData.h:59
Belle2::RootInputModule
Module to read TTree data from file into the data store.
Definition: RootInputModule.h:52
Belle2::RootIOUtilities::expandWordExpansions
std::vector< std::string > expandWordExpansions(const std::vector< std::string > &filenames)
Performs wildcard expansion using wordexp(), returns matches.
Definition: RootIOUtilities.cc:107
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::StoreObjPtr
Type-safe access to single objects in the data store.
Definition: ParticleList.h:33
Belle2::RootIOUtilities::filterBranches
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
Definition: RootIOUtilities.cc:25
LogVar
Class to store variables with their name which were sent to the logging service.
Definition: LogVariableStream.h:24
Belle2::RootIOUtilities::RootFileInfo::getBranchNames
const std::set< std::string > & getBranchNames(bool persistent=false)
Return a set of branch names for either the event or the persistent tree.
Definition: RootFileInfo.cc:65
Belle2::EventMetaData::getExperiment
int getExperiment() const
Experiment Getter.
Definition: EventMetaData.h:174
Belle2::RootIOUtilities::setBranchStatus
size_t setBranchStatus(TBranch *branch, bool process)
Set Branch to be read or not.
Definition: RootIOUtilities.cc:85
Belle2::EventMetaData::getRun
int getRun() const
Run Getter.
Definition: EventMetaData.h:161
Belle2::EventMetaData
Store event, run, and experiment numbers.
Definition: EventMetaData.h:43
Belle2::RootInputModule::ReadStats::addFromFile
void addFromFile(const TFile *f)
add current statistics from TFile object.
Definition: RootInputModule.h:209
Belle2::Mergeable::merge
virtual void merge(const Mergeable *other)=0
Merge object 'other' into this one.
Belle2::FileMetaData::getDate
const std::string & getDate() const
File creation date and time getter (UTC)
Definition: FileMetaData.h:101
Belle2::RootInputModule::ReadStats::getString
std::string getString() const
string suitable for printing.
Definition: RootInputModule.h:216
Belle2::DataStore::EDurability
EDurability
Durability types.
Definition: DataStore.h:60
Belle2::Mergeable
Abstract base class for objects that can be merged.
Definition: Mergeable.h:33