9 #include <boost/python.hpp> 
   11 #include <framework/modules/rootio/RootOutputModule.h> 
   13 #include <framework/io/RootIOUtilities.h> 
   14 #include <framework/core/FileCatalog.h> 
   15 #include <framework/core/MetadataService.h> 
   16 #include <framework/core/RandomNumbers.h> 
   17 #include <framework/database/Database.h> 
   19 #include <framework/core/ModuleParam.templateDetails.h> 
   20 #include <framework/utilities/EnvironmentVariables.h> 
   22 #include <boost/filesystem/path.hpp> 
   23 #include <boost/filesystem/operations.hpp> 
   24 #include <boost/format.hpp> 
   25 #include <boost/algorithm/string.hpp> 
   27 #include <TClonesArray.h> 
   29 #include <nlohmann/json.hpp> 
   37 using namespace RootIOUtilities;
 
   48 RootOutputModule::RootOutputModule() : 
Module(), m_file(nullptr), m_tree{0}, m_experimentLow(1), m_runLow(0),
 
   49   m_eventLow(0), m_experimentHigh(0), m_runHigh(0), m_eventHigh(0)
 
   52   setDescription(
"Writes DataStore objects into a .root file. Data is stored in a TTree 'tree' for event-dependent and in 'persistent' for peristent data. You can use RootInput to read the files back into basf2.");
 
   56   addParam(
"outputFileName", 
m_outputFileName, 
"Name of the output file. Can be overridden using the -o argument to basf2.",
 
   57            string(
"RootOutput.root"));
 
   59            "Ignore override of file name via command line argument -o. Useful if you have multiple output modules in one path.", 
false);
 
   61            "0 for no, 1 for low, 9 for high compression. Level 1 usually reduces size by >50%, higher levels have no noticeable effect. On typical hard disks, disabling compression reduces write time by 10-20 %, but almost doubles read time, so you probably should leave this turned on.",
 
   64            "Set the Compression algorithm. Recommended values are 0 for default, 1 for zlib and 4 for lz4\n\n" 
   67            "Branch split level: determines up to which depth object members will be saved in separate sub-branches in the tree. For arrays or objects with custom streamers, -1 is used instead to ensure the streamers are used. The default (99) usually gives the highest read performance with RootInput.",
 
   70 Flag that specifies whether the file metadata catalog is updated or created. 
   71 This is only necessary in special cases and can always be done afterwards using 
   72 ``b2file-catalog-add filename.root``" 
   74 (You can also set the ``BELLE2_FILECATALOG`` environment variable to NONE to get 
   75 the same effect as setting this to false))DOC", false);
 
   77   vector<string> emptyvector;
 
   79            "Names of event durability branches to be saved. Empty means all branches. Objects with c_DontWriteOut flag added here will also be saved. (EventMetaData is always saved)",
 
   82            "Names of persistent durability branches to be saved. Empty means all branches. Objects with c_DontWriteOut flag added here will also be saved. (FileMetaData is always saved)",
 
   85            "Add additional event branch names without the need to specify all branchnames.",
 
   88            "Add additional persistent branch names without the need to specify all branchnames.",
 
   91            "Names of event durability branches NOT to be saved. Branches also in branchNames are not saved.", emptyvector);
 
   93            "Names of persistent durability branches NOT to be saved. Branches also in branchNamesPersistent are not saved.", emptyvector);
 
   95            "Value for TTree SetAutoFlush(): a positive value tells ROOT to flush all baskets to disk after n entries, a negative value to flush after -n bytes",
 
   98            "Value for TTree SetAutoSave(): a positive value tells ROOT to write the TTree metadata after n entries, a negative value to write the metadata after -n bytes",
 
  102            "name->value pairs to be added to the file metadata to describe the data",
 
  105   addParam(
"keepParents", 
m_keepParents, 
"Keep parents files of input files, input files will not be added as output file's parents",
 
  108 If given split the output file once the file has reached the given size in MB. 
  109 If set the filename will end in ``.f{index:05d}.root``. So if for example 
  110 ``outputFileName`` is set to "RootOutput.root" then the files will be named 
  111 ``RootOutput.f00000.root``, ``RootOutput.f00001.root``, 
  112 ``RootOutput.f00002.root``, ... 
  114 All created output files are complete and independent files and can 
  115 subsequently processed completely independent. 
  118   The output files will be approximately of the size given by 
  119   ``outputSplitSize`` but they will be slightly larger since 
  120   additional information has to be written at the end of the file. If necessary 
  121   please account for this. Also, using ``buildIndex=False`` might be beneficial 
  122   to reduce the overshoot. 
  125   This will set the amount of generated events stored in the file metadata to 
  126   zero as it is not possible to determine which fraction ends up in which 
  129 .. versionadded:: release-03-00-00 
  140   TTree::SetMaxTreeSize(1000 * 1000 * 100000000000LL);
 
  149     if (*
m_outputSplitSize == 0) B2ERROR(
"outputSplitSize must be set to a positive value");
 
  159   std::regex protocol(
"^([A-Za-z]*)://");
 
  174   TDirectory* dir = gDirectory;
 
  183     boost::filesystem::path file{fileUrl.GetFile()};
 
  184     file.replace_extension((boost::format(
"f%05d.root") % 
m_fileIndex).str());
 
  185     fileUrl.SetFile(file.c_str());
 
  187     out = 
m_regularFile? fileUrl.GetFileAndOptions() : fileUrl.GetUrl();
 
  189   m_file = TFile::Open(out.c_str(), 
"RECREATE", 
"basf2 Event File");
 
  192     auto dirpath = out.parent_path();
 
  194     if (boost::filesystem::create_directories(dirpath)) {
 
  195       B2INFO(
"Created missing directory " << dirpath << 
".");
 
  197       m_file = TFile::Open(out.c_str(), 
"RECREATE", 
"basf2 Event File");
 
  202     B2FATAL(
"Couldn't open file " << out << 
" for writing!");
 
  209     set<string> branchList;
 
  210     for (
const auto& pair : map)
 
  211       branchList.insert(pair.first);
 
  219     for (
auto & iter : map) {
 
  220       const std::string& branchName = iter.first;
 
  222       if (iter.second.dontWriteOut
 
  228       if (branchList.count(branchName) == 0) {
 
  238          (branchName != 
"FileMetaData" and branchName != 
"ProcessStatistics")) {
 
  239         B2WARNING(
"Persistent branches might not be stored as expected when splitting the output by size" << 
LogVar(
"branch", branchName));
 
  242       TClass* entryClass = iter.second.objClass;
 
  251       if (!entryClass->HasDictionary()) {
 
  253           B2WARNING(
"No dictionary found, object will not be saved  (This is probably an obsolete class that is still present in the input file.)" 
  254                     << 
LogVar(
"class", entryClass->GetName()) << 
LogVar(
"branch", branchName));
 
  260         B2ERROR(
"The version number in the ClassDef() macro must be at least 1 to enable I/O!" << 
LogVar(
"class", entryClass->GetName()));
 
  265         B2DEBUG(38, 
"Class has custom streamer, setting split level -1 for this branch." << 
LogVar(
"class", entryClass->GetName()));
 
  268         if (iter.second.isArray) {
 
  270           static_cast<TClonesArray*
>(iter.second.object)->BypassStreamer(kFALSE);
 
  273       m_tree[durability]->Branch(branchName.c_str(), &iter.second.object, 
m_basketsize, splitLevel);
 
  274       m_entries[durability].push_back(&iter.second);
 
  275       B2DEBUG(39, 
"The branch " << branchName << 
" was created.");
 
  280                                                               iter.second.isArray));
 
  287     B2INFO(
getName() << 
": Opened " << (
m_fileIndex > 0 ? 
"new " : 
"") << 
"file for writing" << 
LogVar(
"filename", out));
 
  308       for (
int iparent = 0; iparent < 
m_fileMetaData->getNParents(); iparent++) {
 
  348     B2INFO(
getName() << 
": Output size limit reached, closing file ...");
 
  362     unsigned long numEntries = tree->GetEntries();
 
  364       if (numEntries > 10000000) {
 
  366         B2WARNING(
"Not building TTree index because of large number of events. The index object would conflict with ROOT limits on object size and cause problems.");
 
  367       } 
else if (tree->GetBranch(
"EventMetaData")) {
 
  368         tree->SetBranchAddress(
"EventMetaData", 
nullptr);
 
  391     if(
m_fileIndex == 0) B2WARNING(
"Number of MC Events cannot be saved when splitting output files by size, setting to 0");
 
  400   std::string lfn = 
m_file->GetName();
 
  402     lfn = boost::filesystem::absolute(lfn, boost::filesystem::initial_path()).string();
 
  406   if (!format.empty()) {
 
  407     auto format_filename = boost::python::import(
"B2Tools.format").attr(
"format_filename");
 
  428   std::unique_ptr<FileMetaData> old;
 
  441   TDirectory* dir = gDirectory;
 
  445       B2DEBUG(30, 
"Write TTree " << 
c_treeNames[durability]);
 
  446       m_tree[durability]->Write(
c_treeNames[durability].c_str(), TObject::kWriteDelete);
 
  447       delete m_tree[durability];
 
  449     m_tree[durability] = 
nullptr;
 
  453   const std::string filename = 
m_file->GetName();
 
  455     B2INFO(
getName() << 
": Finished writing file." << 
LogVar(
"filename", filename));
 
  481   if (!
m_tree[durability]) 
return;
 
  483   TTree& tree = *
m_tree[durability];
 
  484   for(
auto* entry: 
m_entries[durability]) {
 
  488       entry->object->SetBit(kInvalidObject);
 
  491     tree.SetBranchAddress(entry->name.c_str(), &entry->object);
 
  494   for (
auto* entry: 
m_entries[durability]) {
 
  495     entry->object->ResetBit(kInvalidObject);
 
  498   const bool writeError = 
m_file->TestBit(TFile::kWriteError);
 
  501     const std::string filename = 
m_file->GetName();
 
  503     B2FATAL(
"A write error occured while saving '" << filename << 
"', please check if enough disk space is available.");
 
StoreEntryMap & getStoreEntryMap(EDurability durability)
Get a reference to the object/array map.
static const int c_NDurabilityTypes
Number of Durability Types.
EDurability
Durability types.
@ c_Persistent
Object is available during entire execution time.
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
static DataStore & Instance()
Instance of singleton Store.
bool optionalInput(const StoreAccessorBase &accessor)
Register the given object/array as an optional input.
std::map< std::string, StoreEntry > StoreEntryMap
Map for StoreEntries.
unsigned int getNumberOfMCEvents() const
Number of generated events (from EventInfoSetter).
static Environment & Instance()
Static method to get a reference to the Environment instance.
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
void setDescription(const std::string &description)
Sets the description of the module.
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
@ c_Output
This module is an output module (writes data).
const std::string & getName() const
Returns the name of the module.
static std::string getSeed()
Get the random number generator seed.
unsigned long m_experimentLow
Lowest experiment number.
std::vector< DataStore::StoreEntry * > m_entries[DataStore::c_NDurabilityTypes]
Vector of DataStore entries that are written to the output.
unsigned long m_experimentHigh
Highest experiment number.
unsigned long m_eventLow
Lowest event number in lowest run.
void fillFileMetaData()
Create and fill FileMetaData object.
int m_autosave
Number of entries (if >0) or number of bytes (if <0) after which write the tree metadata to disk.
bool m_regularFile
Whether this is a regular, local file where we can actually create directories.
unsigned long m_runLow
Lowest run number.
int m_compressionAlgorithm
TFile compression algorithm.
virtual void event() override
Write data in c_Event DataStore maps.
bool m_buildIndex
Whether or not we want to build an event index.
bool m_keepParents
Whether to keep parents same as that of input file.
virtual void terminate() override
Write data in the c_Persistent DataStore maps.
FileMetaData m_outputFileMetaData
File meta data finally stored in the output file.
TTree * m_tree[DataStore::c_NDurabilityTypes]
TTree for output.
unsigned long m_runHigh
Highest run number.
StoreObjPtr< EventMetaData > m_eventMetaData
Pointer to the event meta data.
std::vector< std::string > m_excludeBranchNames[DataStore::c_NDurabilityTypes]
Array for names of branches that should NOT be written out.
int m_basketsize
basket size for each branch in the file in bytes
std::vector< std::string > m_additionalBranchNames[DataStore::c_NDurabilityTypes]
Array of names of branches that should be written out although they are not flagged for writeout.
virtual void initialize() override
Setting up of various stuff.
TFile * m_file
TFile for output.
int m_fileIndex
Keep track of the file index: if we split files than we add '.f{fileIndex:05d}' in front of the ROOT ...
void fillTree(DataStore::EDurability durability)
Fill TTree.
bool m_ignoreCommandLineOverride
Ignore filename override from command line.
void closeFile()
Finalize the output file.
std::optional< uint64_t > m_outputSplitSize
Maximum output file size in MB.
int m_compressionLevel
TFile compression level.
int m_autoflush
Number of entries (if >0) or number of bytes (if <0) after which to flush all baskets to disk.
bool m_updateFileCatalog
Flag to enable or disable the update of the metadata catalog.
int m_splitLevel
Branch split level.
void openFile()
Open the next output file.
unsigned long m_eventHigh
Highest event number in highest run.
virtual ~RootOutputModule()
Destructor.
std::map< std::string, std::string > m_additionalDataDescription
Map of additional metadata to be added to the output file.
virtual std::vector< std::string > getFileNames(bool outputFiles=true) override
Set the used output file, taking into account -o argument to basf2.
std::vector< std::string > m_parentLfns
Vector of parent file LFNs.
std::vector< std::string > m_branchNames[DataStore::c_NDurabilityTypes]
Array for names of branches that should be written out.
StoreObjPtr< FileMetaData > m_fileMetaData
Pointer to the file meta data.
std::string m_outputFileName
Name for output file.
Base class for StoreObjPtr and StoreArray for easier common treatment.
Class to store variables with their name which were sent to the logging service.
static std::string get(const std::string &name, const std::string &fallback="")
Get the value of an environment variable or the given fallback value if the variable is not set.
void addParam(const std::string &name, T ¶mVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
static Database & Instance()
Instance of a singleton Database.
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
bool hasCustomStreamer(const TClass *cl)
Returns true if and only if 'cl' has a user-defined streamer.
const std::string c_treeNames[]
Names of trees.
const std::string c_SteerExcludeBranchNames[]
Steering parameter names for m_excludeBranchNames.
const std::string c_SteerBranchNames[]
Steering parameter names for m_branchNames.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
std::set< std::string > filterBranches(const std::set< std::string > &branchesToFilter, const std::vector< std::string > &branches, const std::vector< std::string > &excludeBranches, int durability, bool quiet=false)
Given a list of input branches and lists of branches to include/exclude, returns a list of branches t...
const std::string c_SteerAdditionalBranchNames[]
Steering parameter names for m_additionalBranchNames.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes EventMetaData branch exists there).
bool hasStreamer(const TClass *cl)
Returns true if and only if 'cl' or one of its bases has I/O streamers.
Abstract base class for different kinds of events.