11 #include <boost/python.hpp>
13 #include <framework/modules/rootio/RootOutputModule.h>
15 #include <framework/io/RootIOUtilities.h>
16 #include <framework/core/FileCatalog.h>
17 #include <framework/core/MetadataService.h>
18 #include <framework/core/RandomNumbers.h>
19 #include <framework/database/Database.h>
21 #include <framework/core/ModuleParam.templateDetails.h>
22 #include <framework/utilities/EnvironmentVariables.h>
24 #include <boost/filesystem/path.hpp>
25 #include <boost/filesystem/operations.hpp>
26 #include <boost/format.hpp>
27 #include <boost/algorithm/string.hpp>
29 #include <TClonesArray.h>
31 #include <nlohmann/json.hpp>
39 using namespace RootIOUtilities;
51 m_eventLow(0), m_experimentHigh(0), m_runHigh(0), m_eventHigh(0)
54 setDescription(
"Writes DataStore objects into a .root file. Data is stored in a TTree 'tree' for event-dependent and in 'persistent' for peristent data. You can use RootInput to read the files back into basf2.");
55 setPropertyFlags(c_Output);
58 addParam(
"outputFileName" , m_outputFileName,
"Name of the output file. Can be overridden using the -o argument to basf2.",
59 string(
"RootOutput.root"));
60 addParam(
"ignoreCommandLineOverride" , m_ignoreCommandLineOverride,
61 "Ignore override of file name via command line argument -o. Useful if you have multiple output modules in one path.",
false);
62 addParam(
"compressionLevel", m_compressionLevel,
63 "0 for no, 1 for low, 9 for high compression. Level 1 usually reduces size by >50%, higher levels have no noticeable effect. On typical hard disks, disabling compression reduces write time by 10-20 %, but almost doubles read time, so you probably should leave this turned on.",
65 addParam(
"compressionAlgorithm", m_compressionAlgorithm,
66 "Set the Compression algorithm. Recommended values are 0 for default, 1 for zlib and 4 for lz4\n\n"
67 ".. versionadded:: release-03-00-00" , m_compressionAlgorithm);
68 addParam(
"splitLevel", m_splitLevel,
69 "Branch split level: determines up to which depth object members will be saved in separate sub-branches in the tree. For arrays or objects with custom streamers, -1 is used instead to ensure the streamers are used. The default (99) usually gives the highest read performance with RootInput.",
71 addParam(
"updateFileCatalog", m_updateFileCatalog, R
"DOC(
72 Flag that specifies whether the file metadata catalog is updated or created.
73 This is only necessary in special cases and can always be done afterwards using
74 ``b2file-catalog-add filename.root``"
76 (You can also set the ``BELLE2_FILECATALOG`` environment variable to NONE to get
77 the same effect as setting this to false))DOC", false);
79 vector<string> emptyvector;
80 addParam(c_SteerBranchNames[0], m_branchNames[0],
81 "Names of event durability branches to be saved. Empty means all branches. Objects with c_DontWriteOut flag added here will also be saved. (EventMetaData is always saved)",
83 addParam(c_SteerBranchNames[1], m_branchNames[1],
84 "Names of persistent durability branches to be saved. Empty means all branches. Objects with c_DontWriteOut flag added here will also be saved. (FileMetaData is always saved)",
86 addParam(c_SteerAdditionalBranchNames[0], m_additionalBranchNames[0],
87 "Add additional event branch names without the need to specify all branchnames.",
89 addParam(c_SteerAdditionalBranchNames[1], m_additionalBranchNames[1],
90 "Add additional persistent branch names without the need to specify all branchnames.",
92 addParam(c_SteerExcludeBranchNames[0], m_excludeBranchNames[0],
93 "Names of event durability branches NOT to be saved. Branches also in branchNames are not saved.", emptyvector);
94 addParam(c_SteerExcludeBranchNames[1], m_excludeBranchNames[1],
95 "Names of persistent durability branches NOT to be saved. Branches also in branchNamesPersistent are not saved.", emptyvector);
96 addParam(
"autoFlushSize", m_autoflush,
97 "Value for TTree SetAutoFlush(): a positive value tells ROOT to flush all baskets to disk after n entries, a negative value to flush after -n bytes",
99 addParam(
"autoSaveSize", m_autosave,
100 "Value for TTree SetAutoSave(): a positive value tells ROOT to write the TTree metadata after n entries, a negative value to write the metadata after -n bytes",
102 addParam(
"basketSize", m_basketsize,
"Basketsize for Branches in the Tree in bytes", 32000);
103 addParam(
"additionalDataDescription", m_additionalDataDescription,
"Additional dictionary of "
104 "name->value pairs to be added to the file metadata to describe the data",
105 m_additionalDataDescription);
106 addParam(
"buildIndex", m_buildIndex,
"Build Event Index for faster finding of events by exp/run/event number", m_buildIndex);
107 addParam(
"keepParents", m_keepParents,
"Keep parents files of input files, input files will not be added as output file's parents",
109 addParam(
"outputSplitSize", m_outputSplitSize, R
"DOC(
110 If given split the output file once the file has reached the given size in MB.
111 If set the filename will end in ``.f{index:05d}.root``. So if for example
112 ``outputFileName`` is set to "RootOutput.root" then the files will be named
113 ``RootOutput.f00000.root``, ``RootOutput.f00001.root``,
114 ``RootOutput.f00002.root``, ...
116 All created output files are complete and independent files and can
117 subsequently processed completely independent.
120 The output files will be approximately of the size given by
121 ``outputSplitSize`` but they will be slightly larger since
122 additional information has to be written at the end of the file. If necessary
123 please account for this. Also, using ``buildIndex=False`` might be beneficial
124 to reduce the overshoot.
127 This will set the amount of generated events stored in the file metadata to
128 zero as it is not possible to determine which fraction ends up in which
131 .. versionadded:: release-03-00-00
132 )DOC", m_outputSplitSize);
136 RootOutputModule::~RootOutputModule() = default;
138 void RootOutputModule::initialize()
142 TTree::SetMaxTreeSize(1000 * 1000 * 100000000000LL);
145 m_fileMetaData.registerInDataStore();
147 m_eventMetaData.isRequired();
150 if (m_outputSplitSize) {
151 if (*m_outputSplitSize == 0) B2ERROR(
"outputSplitSize must be set to a positive value");
153 if (*m_outputSplitSize >= 1024*1024) B2WARNING(
"outputSplitSize set to " << *m_outputSplitSize <<
" MB, please make sure the units are correct");
155 *m_outputSplitSize *= 1024 * 1024;
161 std::regex protocol(
"^([A-Za-z]*)://");
162 if(std::smatch m; std::regex_search(m_outputFileName, m, protocol)) {
165 m_outputFileName = std::regex_replace(m_outputFileName, protocol,
"");
168 m_regularFile =
false;
174 void RootOutputModule::openFile()
176 TDirectory* dir = gDirectory;
177 boost::filesystem::path out{m_outputFileName};
178 if (m_outputSplitSize) {
184 TUrl fileUrl(m_outputFileName.c_str(), m_regularFile);
185 boost::filesystem::path file{fileUrl.GetFile()};
186 file.replace_extension((boost::format(
"f%05d.root") % m_fileIndex).str());
187 fileUrl.SetFile(file.c_str());
189 out = m_regularFile? fileUrl.GetFileAndOptions() : fileUrl.GetUrl();
191 m_file = TFile::Open(out.c_str(),
"RECREATE",
"basf2 Event File");
192 if ((!m_file || m_file->IsZombie()) && m_regularFile) {
194 auto dirpath = out.parent_path();
196 if (boost::filesystem::create_directories(dirpath)) {
197 B2INFO(
"Created missing directory " << dirpath <<
".");
199 m_file = TFile::Open(out.c_str(),
"RECREATE",
"basf2 Event File");
203 if (!m_file || m_file->IsZombie()) {
204 B2FATAL(
"Couldn't open file " << out <<
" for writing!");
206 m_file->SetCompressionAlgorithm(m_compressionAlgorithm);
207 m_file->SetCompressionLevel(m_compressionLevel);
209 for (
int durability = 0; durability < DataStore::c_NDurabilityTypes; durability++) {
211 set<string> branchList;
212 for (
const auto& pair : map)
213 branchList.insert(pair.first);
215 branchList =
filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability);
219 m_tree[durability]->SetAutoFlush(m_autoflush);
220 m_tree[durability]->SetAutoSave(m_autosave);
221 for (
auto & iter : map) {
222 const std::string& branchName = iter.first;
224 if (iter.second.dontWriteOut
225 && find(m_branchNames[durability].begin(), m_branchNames[durability].end(), branchName) == m_branchNames[durability].end()
226 && find(m_additionalBranchNames[durability].begin(), m_additionalBranchNames[durability].end(),
227 branchName) == m_additionalBranchNames[durability].end())
230 if (branchList.count(branchName) == 0) {
232 if (((branchName !=
"FileMetaData") || (durability == DataStore::c_Event)) &&
233 ((branchName !=
"EventMetaData") || (durability == DataStore::c_Persistent))) {
239 if(durability == DataStore::c_Persistent and m_outputSplitSize and m_fileIndex==0 and
240 (branchName !=
"FileMetaData" and branchName !=
"ProcessStatistics")) {
241 B2WARNING(
"Persistent branches might not be stored as expected when splitting the output by size" <<
LogVar(
"branch", branchName));
244 TClass* entryClass = iter.second.objClass;
253 if (!entryClass->HasDictionary()) {
254 if (m_fileIndex == 0) {
255 B2WARNING(
"No dictionary found, object will not be saved (This is probably an obsolete class that is still present in the input file.)"
256 <<
LogVar(
"class", entryClass->GetName()) <<
LogVar(
"branch", branchName));
262 B2ERROR(
"The version number in the ClassDef() macro must be at least 1 to enable I/O!" <<
LogVar(
"class", entryClass->GetName()));
265 int splitLevel = m_splitLevel;
267 B2DEBUG(38,
"Class has custom streamer, setting split level -1 for this branch." <<
LogVar(
"class", entryClass->GetName()));
270 if (iter.second.isArray) {
272 static_cast<TClonesArray*
>(iter.second.object)->BypassStreamer(kFALSE);
275 m_tree[durability]->Branch(branchName.c_str(), &iter.second.object, m_basketsize, splitLevel);
276 m_tree[durability]->SetBranchAddress(branchName.c_str(), &iter.second.object);
277 m_entries[durability].push_back(&iter.second);
278 B2DEBUG(39,
"The branch " << branchName <<
" was created.");
281 if (m_fileIndex == 0) {
283 iter.second.isArray));
289 if (m_outputSplitSize) {
290 B2INFO(getName() <<
": Opened " << (m_fileIndex > 0 ?
"new " :
"") <<
"file for writing" <<
LogVar(
"filename", out));
295 void RootOutputModule::event()
298 if (!m_file) openFile();
300 if (!m_keepParents) {
301 if (m_fileMetaData) {
302 m_eventMetaData->setParentLfn(m_fileMetaData->getLfn());
307 fillTree(DataStore::c_Event);
309 if (m_fileMetaData) {
311 for (
int iparent = 0; iparent < m_fileMetaData->getNParents(); iparent++) {
312 string lfn = m_fileMetaData->getParent(iparent);
313 if (!lfn.empty() && (m_parentLfns.empty() || (m_parentLfns.back() != lfn))) {
314 m_parentLfns.push_back(lfn);
318 string lfn = m_fileMetaData->getLfn();
319 if (!lfn.empty() && (m_parentLfns.empty() || (m_parentLfns.back() != lfn))) {
320 m_parentLfns.push_back(lfn);
326 unsigned long experiment = m_eventMetaData->getExperiment();
327 unsigned long run = m_eventMetaData->getRun();
328 unsigned long event = m_eventMetaData->getEvent();
329 if (m_experimentLow > m_experimentHigh) {
330 m_experimentLow = m_experimentHigh = experiment;
331 m_runLow = m_runHigh = run;
332 m_eventLow = m_eventHigh = event;
334 if ((experiment < m_experimentLow) || ((experiment == m_experimentLow) && ((run < m_runLow) || ((run == m_runLow)
335 && (event < m_eventLow))))) {
336 m_experimentLow = experiment;
340 if ((experiment > m_experimentHigh) || ((experiment == m_experimentHigh) && ((run > m_runHigh) || ((run == m_runHigh)
341 && (event > m_eventHigh))))) {
342 m_experimentHigh = experiment;
349 if (m_outputSplitSize and (uint64_t)m_file->GetEND() > *m_outputSplitSize) {
351 B2INFO(getName() <<
": Output size limit reached, closing file ...");
356 void RootOutputModule::fillFileMetaData()
358 bool isMC = (m_fileMetaData) ? m_fileMetaData->isMC() :
true;
359 m_fileMetaData.create(
true);
360 if (!isMC) m_fileMetaData->declareRealData();
362 if (m_tree[DataStore::c_Event]) {
364 TTree* tree = m_tree[DataStore::c_Event];
365 unsigned long numEntries = tree->GetEntries();
366 if (m_buildIndex && numEntries > 0) {
367 if (numEntries > 10000000) {
369 B2WARNING(
"Not building TTree index because of large number of events. The index object would conflict with ROOT limits on object size and cause problems.");
370 }
else if (tree->GetBranch(
"EventMetaData")) {
371 tree->SetBranchAddress(
"EventMetaData",
nullptr);
372 RootIOUtilities::buildIndex(tree);
376 m_fileMetaData->setNEvents(numEntries);
377 if (m_experimentLow > m_experimentHigh) {
379 m_fileMetaData->setLow(-1, -1, 0);
380 m_fileMetaData->setHigh(-1, -1, 0);
382 m_fileMetaData->setLow(m_experimentLow, m_runLow, m_eventLow);
383 m_fileMetaData->setHigh(m_experimentHigh, m_runHigh, m_eventHigh);
388 m_fileMetaData->setParents(m_parentLfns);
389 RootIOUtilities::setCreationData(*m_fileMetaData);
390 m_fileMetaData->setRandomSeed(RandomNumbers::getSeed());
391 m_fileMetaData->setSteering(Environment::Instance().getSteering());
392 auto mcEvents = Environment::Instance().getNumberOfMCEvents();
393 if(m_outputSplitSize and mcEvents > 0) {
394 if(m_fileIndex == 0) B2WARNING(
"Number of MC Events cannot be saved when splitting output files by size, setting to 0");
397 m_fileMetaData->setMcEvents(mcEvents);
398 m_fileMetaData->setDatabaseGlobalTag(Database::Instance().getGlobalTags());
399 for (
const auto& item : m_additionalDataDescription) {
400 m_fileMetaData->setDataDescription(item.first, item.second);
403 std::string lfn = m_file->GetName();
405 lfn = boost::filesystem::absolute(lfn, boost::filesystem::initial_path()).string();
408 std::string format = EnvironmentVariables::get(
"BELLE2_LFN_FORMATSTRING",
"");
409 if (!format.empty()) {
410 auto format_filename = boost::python::import(
"B2Tools.format").attr(
"format_filename");
411 lfn = boost::python::extract<std::string>(format_filename(format, m_outputFileName, m_fileMetaData->getJsonStr()));
413 m_fileMetaData->setLfn(lfn);
415 if (m_updateFileCatalog) {
416 FileCatalog::Instance().registerFile(m_file->GetName(), *m_fileMetaData);
418 m_outputFileMetaData = *m_fileMetaData;
422 void RootOutputModule::terminate()
427 void RootOutputModule::closeFile()
431 std::unique_ptr<FileMetaData> old;
432 if (m_fileMetaData) old = std::make_unique<FileMetaData>(*m_fileMetaData);
437 fillTree(DataStore::c_Persistent);
440 if (old) *m_fileMetaData = *old;
444 TDirectory* dir = gDirectory;
446 for (
int durability = 0; durability < DataStore::c_NDurabilityTypes; ++durability) {
447 if (m_tree[durability]) {
448 B2DEBUG(30,
"Write TTree " <<
c_treeNames[durability]);
449 m_tree[durability]->Write(
c_treeNames[durability].c_str(), TObject::kWriteDelete);
450 delete m_tree[durability];
452 m_tree[durability] =
nullptr;
456 const std::string filename = m_file->GetName();
457 if (m_outputSplitSize) {
458 B2INFO(getName() <<
": Finished writing file." <<
LogVar(
"filename", filename));
464 MetadataService::Instance().addRootOutputFile(filename, &m_outputFileMetaData);
467 for (
auto & entry : m_entries) {
470 m_parentLfns.clear();
472 m_experimentHigh = 0;
484 if (!m_tree[durability])
return;
487 for (
unsigned int i = 0; i < m_entries[durability].size(); i++) {
488 if (!m_entries[durability][i]->ptr) {
490 m_tree[durability]->SetBranchAddress(m_entries[durability][i]->name.c_str(),
nullptr);
492 m_tree[durability]->SetBranchAddress(m_entries[durability][i]->name.c_str(), &m_entries[durability][i]->object);
495 m_tree[durability]->Fill();
497 const bool writeError = m_file->TestBit(TFile::kWriteError);
501 B2FATAL(
"A write error occured while saving '" << m_file->GetName() <<
"', please check if enough disk space is available.");