9 #include <boost/python.hpp>
11 #include <framework/modules/rootio/RootOutputModule.h>
13 #include <framework/io/RootIOUtilities.h>
14 #include <framework/core/FileCatalog.h>
15 #include <framework/core/MetadataService.h>
16 #include <framework/core/RandomNumbers.h>
17 #include <framework/database/Database.h>
19 #include <framework/core/ModuleParam.templateDetails.h>
20 #include <framework/utilities/EnvironmentVariables.h>
22 #include <boost/filesystem/path.hpp>
23 #include <boost/filesystem/operations.hpp>
24 #include <boost/format.hpp>
25 #include <boost/algorithm/string.hpp>
27 #include <TClonesArray.h>
29 #include <nlohmann/json.hpp>
37 using namespace RootIOUtilities;
49 m_eventLow(0), m_experimentHigh(0), m_runHigh(0), m_eventHigh(0)
52 setDescription(
"Writes DataStore objects into a .root file. Data is stored in a TTree 'tree' for event-dependent and in 'persistent' for peristent data. You can use RootInput to read the files back into basf2.");
53 setPropertyFlags(c_Output);
56 addParam(
"outputFileName" , m_outputFileName,
"Name of the output file. Can be overridden using the -o argument to basf2.",
57 string(
"RootOutput.root"));
58 addParam(
"ignoreCommandLineOverride" , m_ignoreCommandLineOverride,
59 "Ignore override of file name via command line argument -o. Useful if you have multiple output modules in one path.",
false);
60 addParam(
"compressionLevel", m_compressionLevel,
61 "0 for no, 1 for low, 9 for high compression. Level 1 usually reduces size by >50%, higher levels have no noticeable effect. On typical hard disks, disabling compression reduces write time by 10-20 %, but almost doubles read time, so you probably should leave this turned on.",
63 addParam(
"compressionAlgorithm", m_compressionAlgorithm,
64 "Set the Compression algorithm. Recommended values are 0 for default, 1 for zlib and 4 for lz4\n\n"
65 ".. versionadded:: release-03-00-00" , m_compressionAlgorithm);
66 addParam(
"splitLevel", m_splitLevel,
67 "Branch split level: determines up to which depth object members will be saved in separate sub-branches in the tree. For arrays or objects with custom streamers, -1 is used instead to ensure the streamers are used. The default (99) usually gives the highest read performance with RootInput.",
69 addParam(
"updateFileCatalog", m_updateFileCatalog, R
"DOC(
70 Flag that specifies whether the file metadata catalog is updated or created.
71 This is only necessary in special cases and can always be done afterwards using
72 ``b2file-catalog-add filename.root``"
74 (You can also set the ``BELLE2_FILECATALOG`` environment variable to NONE to get
75 the same effect as setting this to false))DOC", false);
77 vector<string> emptyvector;
78 addParam(c_SteerBranchNames[0], m_branchNames[0],
79 "Names of event durability branches to be saved. Empty means all branches. Objects with c_DontWriteOut flag added here will also be saved. (EventMetaData is always saved)",
81 addParam(c_SteerBranchNames[1], m_branchNames[1],
82 "Names of persistent durability branches to be saved. Empty means all branches. Objects with c_DontWriteOut flag added here will also be saved. (FileMetaData is always saved)",
84 addParam(c_SteerAdditionalBranchNames[0], m_additionalBranchNames[0],
85 "Add additional event branch names without the need to specify all branchnames.",
87 addParam(c_SteerAdditionalBranchNames[1], m_additionalBranchNames[1],
88 "Add additional persistent branch names without the need to specify all branchnames.",
90 addParam(c_SteerExcludeBranchNames[0], m_excludeBranchNames[0],
91 "Names of event durability branches NOT to be saved. Branches also in branchNames are not saved.", emptyvector);
92 addParam(c_SteerExcludeBranchNames[1], m_excludeBranchNames[1],
93 "Names of persistent durability branches NOT to be saved. Branches also in branchNamesPersistent are not saved.", emptyvector);
94 addParam(
"autoFlushSize", m_autoflush,
95 "Value for TTree SetAutoFlush(): a positive value tells ROOT to flush all baskets to disk after n entries, a negative value to flush after -n bytes",
97 addParam(
"autoSaveSize", m_autosave,
98 "Value for TTree SetAutoSave(): a positive value tells ROOT to write the TTree metadata after n entries, a negative value to write the metadata after -n bytes",
100 addParam(
"basketSize", m_basketsize,
"Basketsize for Branches in the Tree in bytes", 32000);
101 addParam(
"additionalDataDescription", m_additionalDataDescription,
"Additional dictionary of "
102 "name->value pairs to be added to the file metadata to describe the data",
103 m_additionalDataDescription);
104 addParam(
"buildIndex", m_buildIndex,
"Build Event Index for faster finding of events by exp/run/event number", m_buildIndex);
105 addParam(
"keepParents", m_keepParents,
"Keep parents files of input files, input files will not be added as output file's parents",
107 addParam(
"outputSplitSize", m_outputSplitSize, R
"DOC(
108 If given split the output file once the file has reached the given size in MB.
109 If set the filename will end in ``.f{index:05d}.root``. So if for example
110 ``outputFileName`` is set to "RootOutput.root" then the files will be named
111 ``RootOutput.f00000.root``, ``RootOutput.f00001.root``,
112 ``RootOutput.f00002.root``, ...
114 All created output files are complete and independent files and can
115 subsequently processed completely independent.
118 The output files will be approximately of the size given by
119 ``outputSplitSize`` but they will be slightly larger since
120 additional information has to be written at the end of the file. If necessary
121 please account for this. Also, using ``buildIndex=False`` might be beneficial
122 to reduce the overshoot.
125 This will set the amount of generated events stored in the file metadata to
126 zero as it is not possible to determine which fraction ends up in which
129 .. versionadded:: release-03-00-00
130 )DOC", m_outputSplitSize);
134 RootOutputModule::~RootOutputModule() = default;
136 void RootOutputModule::initialize()
140 TTree::SetMaxTreeSize(1000 * 1000 * 100000000000LL);
143 m_fileMetaData.registerInDataStore();
145 m_eventMetaData.isRequired();
148 if (m_outputSplitSize) {
149 if (*m_outputSplitSize == 0) B2ERROR(
"outputSplitSize must be set to a positive value");
151 if (*m_outputSplitSize >= 1024*1024) B2WARNING(
"outputSplitSize set to " << *m_outputSplitSize <<
" MB, please make sure the units are correct");
153 *m_outputSplitSize *= 1024 * 1024;
159 std::regex protocol(
"^([A-Za-z]*)://");
160 if(std::smatch m; std::regex_search(m_outputFileName, m, protocol)) {
163 m_outputFileName = std::regex_replace(m_outputFileName, protocol,
"");
166 m_regularFile =
false;
172 void RootOutputModule::openFile()
174 TDirectory* dir = gDirectory;
175 boost::filesystem::path out{m_outputFileName};
176 if (m_outputSplitSize) {
182 TUrl fileUrl(m_outputFileName.c_str(), m_regularFile);
183 boost::filesystem::path file{fileUrl.GetFile()};
184 file.replace_extension((boost::format(
"f%05d.root") % m_fileIndex).str());
185 fileUrl.SetFile(file.c_str());
187 out = m_regularFile? fileUrl.GetFileAndOptions() : fileUrl.GetUrl();
189 m_file = TFile::Open(out.c_str(),
"RECREATE",
"basf2 Event File");
190 if ((!m_file || m_file->IsZombie()) && m_regularFile) {
192 auto dirpath = out.parent_path();
194 if (boost::filesystem::create_directories(dirpath)) {
195 B2INFO(
"Created missing directory " << dirpath <<
".");
197 m_file = TFile::Open(out.c_str(),
"RECREATE",
"basf2 Event File");
201 if (!m_file || m_file->IsZombie()) {
202 B2FATAL(
"Couldn't open file " << out <<
" for writing!");
204 m_file->SetCompressionAlgorithm(m_compressionAlgorithm);
205 m_file->SetCompressionLevel(m_compressionLevel);
207 for (
int durability = 0; durability < DataStore::c_NDurabilityTypes; durability++) {
209 set<string> branchList;
210 for (
const auto& pair : map)
211 branchList.insert(pair.first);
213 branchList =
filterBranches(branchList, m_branchNames[durability], m_excludeBranchNames[durability], durability);
217 m_tree[durability]->SetAutoFlush(m_autoflush);
218 m_tree[durability]->SetAutoSave(m_autosave);
219 for (
auto & iter : map) {
220 const std::string& branchName = iter.first;
222 if (iter.second.dontWriteOut
223 && find(m_branchNames[durability].begin(), m_branchNames[durability].end(), branchName) == m_branchNames[durability].end()
224 && find(m_additionalBranchNames[durability].begin(), m_additionalBranchNames[durability].end(),
225 branchName) == m_additionalBranchNames[durability].end())
228 if (branchList.count(branchName) == 0) {
230 if (((branchName !=
"FileMetaData") || (durability == DataStore::c_Event)) &&
231 ((branchName !=
"EventMetaData") || (durability == DataStore::c_Persistent))) {
237 if(durability == DataStore::c_Persistent and m_outputSplitSize and m_fileIndex==0 and
238 (branchName !=
"FileMetaData" and branchName !=
"ProcessStatistics")) {
239 B2WARNING(
"Persistent branches might not be stored as expected when splitting the output by size" <<
LogVar(
"branch", branchName));
242 TClass* entryClass = iter.second.objClass;
251 if (!entryClass->HasDictionary()) {
252 if (m_fileIndex == 0) {
253 B2WARNING(
"No dictionary found, object will not be saved (This is probably an obsolete class that is still present in the input file.)"
254 <<
LogVar(
"class", entryClass->GetName()) <<
LogVar(
"branch", branchName));
260 B2ERROR(
"The version number in the ClassDef() macro must be at least 1 to enable I/O!" <<
LogVar(
"class", entryClass->GetName()));
263 int splitLevel = m_splitLevel;
265 B2DEBUG(38,
"Class has custom streamer, setting split level -1 for this branch." <<
LogVar(
"class", entryClass->GetName()));
268 if (iter.second.isArray) {
270 static_cast<TClonesArray*
>(iter.second.object)->BypassStreamer(kFALSE);
273 m_tree[durability]->Branch(branchName.c_str(), &iter.second.object, m_basketsize, splitLevel);
274 m_entries[durability].push_back(&iter.second);
275 B2DEBUG(39,
"The branch " << branchName <<
" was created.");
278 if (m_fileIndex == 0) {
280 iter.second.isArray));
286 if (m_outputSplitSize) {
287 B2INFO(getName() <<
": Opened " << (m_fileIndex > 0 ?
"new " :
"") <<
"file for writing" <<
LogVar(
"filename", out));
292 void RootOutputModule::event()
295 if (!m_file) openFile();
297 if (!m_keepParents) {
298 if (m_fileMetaData) {
299 m_eventMetaData->setParentLfn(m_fileMetaData->getLfn());
304 fillTree(DataStore::c_Event);
306 if (m_fileMetaData) {
308 for (
int iparent = 0; iparent < m_fileMetaData->getNParents(); iparent++) {
309 string lfn = m_fileMetaData->getParent(iparent);
310 if (!lfn.empty() && (m_parentLfns.empty() || (m_parentLfns.back() != lfn))) {
311 m_parentLfns.push_back(lfn);
315 string lfn = m_fileMetaData->getLfn();
316 if (!lfn.empty() && (m_parentLfns.empty() || (m_parentLfns.back() != lfn))) {
317 m_parentLfns.push_back(lfn);
323 unsigned long experiment = m_eventMetaData->getExperiment();
324 unsigned long run = m_eventMetaData->getRun();
325 unsigned long event = m_eventMetaData->getEvent();
326 if (m_experimentLow > m_experimentHigh) {
327 m_experimentLow = m_experimentHigh = experiment;
328 m_runLow = m_runHigh = run;
329 m_eventLow = m_eventHigh = event;
331 if ((experiment < m_experimentLow) || ((experiment == m_experimentLow) && ((run < m_runLow) || ((run == m_runLow)
332 && (event < m_eventLow))))) {
333 m_experimentLow = experiment;
337 if ((experiment > m_experimentHigh) || ((experiment == m_experimentHigh) && ((run > m_runHigh) || ((run == m_runHigh)
338 && (event > m_eventHigh))))) {
339 m_experimentHigh = experiment;
346 if (m_outputSplitSize and (uint64_t)m_file->GetEND() > *m_outputSplitSize) {
348 B2INFO(getName() <<
": Output size limit reached, closing file ...");
353 void RootOutputModule::fillFileMetaData()
355 bool isMC = (m_fileMetaData) ? m_fileMetaData->isMC() :
true;
356 m_fileMetaData.create(
true);
357 if (!isMC) m_fileMetaData->declareRealData();
359 if (m_tree[DataStore::c_Event]) {
361 TTree* tree = m_tree[DataStore::c_Event];
362 unsigned long numEntries = tree->GetEntries();
363 if (m_buildIndex && numEntries > 0) {
364 if (numEntries > 10000000) {
366 B2WARNING(
"Not building TTree index because of large number of events. The index object would conflict with ROOT limits on object size and cause problems.");
367 }
else if (tree->GetBranch(
"EventMetaData")) {
368 tree->SetBranchAddress(
"EventMetaData",
nullptr);
369 RootIOUtilities::buildIndex(tree);
373 m_fileMetaData->setNEvents(numEntries);
374 if (m_experimentLow > m_experimentHigh) {
376 m_fileMetaData->setLow(-1, -1, 0);
377 m_fileMetaData->setHigh(-1, -1, 0);
379 m_fileMetaData->setLow(m_experimentLow, m_runLow, m_eventLow);
380 m_fileMetaData->setHigh(m_experimentHigh, m_runHigh, m_eventHigh);
385 m_fileMetaData->setParents(m_parentLfns);
386 RootIOUtilities::setCreationData(*m_fileMetaData);
387 m_fileMetaData->setRandomSeed(RandomNumbers::getSeed());
388 m_fileMetaData->setSteering(Environment::Instance().getSteering());
389 auto mcEvents = Environment::Instance().getNumberOfMCEvents();
390 if(m_outputSplitSize and mcEvents > 0) {
391 if(m_fileIndex == 0) B2WARNING(
"Number of MC Events cannot be saved when splitting output files by size, setting to 0");
394 m_fileMetaData->setMcEvents(mcEvents);
395 m_fileMetaData->setDatabaseGlobalTag(Database::Instance().getGlobalTags());
396 for (
const auto& item : m_additionalDataDescription) {
397 m_fileMetaData->setDataDescription(item.first, item.second);
400 std::string lfn = m_file->GetName();
402 lfn = boost::filesystem::absolute(lfn, boost::filesystem::initial_path()).string();
405 std::string format = EnvironmentVariables::get(
"BELLE2_LFN_FORMATSTRING",
"");
406 if (!format.empty()) {
407 auto format_filename = boost::python::import(
"B2Tools.format").attr(
"format_filename");
408 lfn = boost::python::extract<std::string>(format_filename(format, m_outputFileName, m_fileMetaData->getJsonStr()));
410 m_fileMetaData->setLfn(lfn);
412 if (m_updateFileCatalog) {
413 FileCatalog::Instance().registerFile(m_file->GetName(), *m_fileMetaData);
415 m_outputFileMetaData = *m_fileMetaData;
419 void RootOutputModule::terminate()
424 void RootOutputModule::closeFile()
428 std::unique_ptr<FileMetaData> old;
429 if (m_fileMetaData) old = std::make_unique<FileMetaData>(*m_fileMetaData);
434 fillTree(DataStore::c_Persistent);
437 if (old) *m_fileMetaData = *old;
441 TDirectory* dir = gDirectory;
443 for (
int durability = 0; durability < DataStore::c_NDurabilityTypes; ++durability) {
444 if (m_tree[durability]) {
445 B2DEBUG(30,
"Write TTree " <<
c_treeNames[durability]);
446 m_tree[durability]->Write(
c_treeNames[durability].c_str(), TObject::kWriteDelete);
447 delete m_tree[durability];
449 m_tree[durability] =
nullptr;
453 const std::string filename = m_file->GetName();
454 if (m_outputSplitSize) {
455 B2INFO(getName() <<
": Finished writing file." <<
LogVar(
"filename", filename));
461 MetadataService::Instance().addRootOutputFile(filename, &m_outputFileMetaData);
464 for (
auto & entry : m_entries) {
467 m_parentLfns.clear();
469 m_experimentHigh = 0;
481 if (!m_tree[durability])
return;
483 TTree& tree = *m_tree[durability];
484 for(
auto* entry: m_entries[durability]) {
488 entry->object->SetBit(kInvalidObject);
491 tree.SetBranchAddress(entry->name.c_str(), &entry->object);
494 for (
auto* entry: m_entries[durability]) {
495 entry->object->ResetBit(kInvalidObject);
498 const bool writeError = m_file->TestBit(TFile::kWriteError);
501 const std::string filename = m_file->GetName();
503 B2FATAL(
"A write error occured while saving '" << filename <<
"', please check if enough disk space is available.");
EDurability
Durability types.
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
Write objects from DataStore into a ROOT file.
Base class for StoreObjPtr and StoreArray for easier common treatment.
Class to store variables with their name which were sent to the logging service.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
bool hasCustomStreamer(const TClass *cl)
Returns true if and only if 'cl' has a user-defined streamer.
const std::string c_treeNames[]
Names of trees.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
bool hasStreamer(const TClass *cl)
Returns true if and only if 'cl' or one of its bases has I/O streamers.
Abstract base class for different kinds of events.