8 #include <framework/dataobjects/FileMetaData.h> 
    9 #include <framework/io/RootIOUtilities.h> 
   10 #include <framework/io/RootFileInfo.h> 
   11 #include <framework/logging/Logger.h> 
   12 #include <framework/pcore/Mergeable.h> 
   13 #include <framework/core/FileCatalog.h> 
   14 #include <framework/utilities/KeyValuePrinter.h> 
   16 #include <boost/program_options.hpp> 
   17 #include <boost/algorithm/string.hpp> 
   21 #include <TBranchElement.h> 
   31 namespace po = boost::program_options;
 
   32 namespace fs = std::filesystem;
 
   36 using EventInfo = std::tuple<int, int, unsigned int>;
 
   41   std::string removeLegacyGt(
const std::string& globaltags)
 
   43     std::regex legacy_gt(
",?Legacy_IP_Information");
 
   44     return std::regex_replace(globaltags, legacy_gt, 
"");
 
   48 int main(
int argc, 
char* argv[])
 
   51   std::string outputfilename;
 
   52   std::vector<std::string> inputfilenames;
 
   53   po::options_description options(
"Options");
 
   55   (
"help,h", 
"print all available options")
 
   56   (
"output,o", po::value<std::string>(&outputfilename), 
"output file name")
 
   57   (
"file", po::value<std::vector<std::string>>(&inputfilenames), 
"filename to merge")
 
   58   (
"force,f", 
"overwrite existing file")
 
   59   (
"no-catalog", 
"don't register output file in file catalog, This is now the default")
 
   60   (
"add-to-catalog", 
"register the output file in the file catalog")
 
   61   (
"quiet,q", 
"if given don't print infos, just warnings and errors");
 
   62   po::positional_options_description positional;
 
   63   positional.add(
"output", 1);
 
   64   positional.add(
"file", -1);
 
   65   po::variables_map variables;
 
   66   po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
 
   67   po::notify(variables);
 
   68   if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
 
   69     std::cout << 
"Usage: " << argv[0] << 
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
 
   70     std::cout << 
"       " << argv[0] << 
" [<options>] [--file INPUTFILE...] " 
   71               << 
"-o OUTPUTFILE [--file INPUTFILE...]"  << std::endl << std::endl;
 
   72     std::cout << options << std::endl;
 
   74 This program is intended to merge files created by separate basf2 jobs. It's 
   75 similar to hadd but does correctly update the metadata in the file and merges 
   76 the objects in the persistent tree correctly. 
   78 The following restrictions apply: 
   79   - The files have to be created with the same release and steering file 
   80   - The persistent tree is only allowed to contain FileMetaData and objects 
   81     inheriting from Mergeable and the same list of objects needs to be present 
   83   - The event tree needs to contain the same DataStore entries in all files. 
   93   if(variables.count(
"quiet")>0){
 
   97   B2INFO(
"Merging files into " << std::quoted(outputfilename));
 
   99   if (fs::exists(outputfilename) && variables.count(
"force")==0) {
 
  100     B2ERROR(
"Output file exists, use -f to force overwriting it");
 
  108   std::set<std::string> allParents;
 
  111   std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
 
  113   std::set<std::string> allSeeds;
 
  115   std::set<std::string> allUsers;
 
  117   std::optional<EventInfo> lowEvt, highEvt;
 
  120   std::set<std::string> allEventBranches;
 
  122   std::string outputRelease;
 
  128   for (
const auto& input : inputfilenames) {
 
  132       const auto &fileMetaData = fileInfo.getFileMetaData();
 
  134       B2INFO(
"adding file " << std::quoted(input));
 
  137       auto branches = fileInfo.getBranchNames();
 
  138       if(branches.empty()) {
 
  139         throw std::runtime_error(
"Could not find any branches in event tree");
 
  141       if(allEventBranches.empty()) {
 
  142         std::swap(allEventBranches,branches);
 
  144         if(branches!=allEventBranches){
 
  145           B2ERROR(
"Branches in " << std::quoted(input) << 
" differ from " 
  146               << std::quoted(inputfilenames.front()));
 
  152       for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
 
  153         auto* br = 
dynamic_cast<TBranchElement*
>(brObj);
 
  155         if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) == 
"FileMetaData")
 
  159         if(!br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
 
  160           B2ERROR(
"Branch " << std::quoted(br->GetName()) << 
" in persistent tree not inheriting from Mergable");
 
  165         br->SetAddress(&
object);
 
  166         if(br->GetEntry(0)<=0) {
 
  167           B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) << 
" of entry 0 from persistent tree in " 
  168               << std::quoted(input));
 
  172         auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
 
  175             it.first->second.first->merge(
object);
 
  176           }
catch(std::exception &e){
 
  177             B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) << 
" in " << std::quoted(input) << 
": " << e.what());
 
  179           it.first->second.second++;
 
  183           B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) << 
" in persistent tree");
 
  187       std::string release = fileMetaData.getRelease();
 
  189         B2ERROR(
"Cannot determine release used to create " <<  std::quoted(input));
 
  191       }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(), 
"-modified")){
 
  192         B2WARNING(
"File " << std::quoted(input) << 
" created with modified software " 
  193                   <<  fileMetaData.getRelease()
 
  194                   << 
": cannot verify that files are compatible");
 
  195         release = release.substr(0, release.size() - std::string(
"-modified").size());
 
  199       if (!outputMetaData) {
 
  202         outputRelease = release;
 
  205         if(release != outputRelease) {
 
  206           B2ERROR(
"Release in " << std::quoted(input) << 
" differs from previous files: " <<
 
  207                   fileMetaData.getRelease() << 
" != " << outputMetaData->getRelease());
 
  209         if(fileMetaData.getSteering() != outputMetaData->getSteering()){
 
  211           B2ERROR(
"Steering file for " << std::quoted(input) << 
" differs from previous files.");
 
  213         if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
 
  218           if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
 
  219             outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
 
  221             B2ERROR(
"Database globalTag in " << std::quoted(input) << 
" differs from previous files: " <<
 
  222                     fileMetaData.getDatabaseGlobalTag() << 
" != " << outputMetaData->getDatabaseGlobalTag());
 
  225         if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
 
  227           for (
const auto& descrPair : outputMetaData->getDataDescription())
 
  228             cur.put(descrPair.first, descrPair.second);
 
  230           for (
const auto& descrPair : fileMetaData.getDataDescription())
 
  231             prev.put(descrPair.first, descrPair.second);
 
  233           B2ERROR(
"dataDescription in " << std::quoted(input) << 
" differs from previous files:\n" << cur.string() << 
" vs.\n" << prev.string());
 
  235         if(fileMetaData.isMC() != outputMetaData->isMC()){
 
  236           B2ERROR(
"Type (real/MC) for " << std::quoted(input) << 
" differs from previous files.");
 
  239         outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
 
  240         outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
 
  241         outputMetaData->setNFullEvents(outputMetaData->getNFullEvents() + fileMetaData.getNFullEvents());
 
  243       if(fileMetaData.getNEvents() < 1) {
 
  244         B2WARNING(
"File " << std::quoted(input) << 
" is empty.");
 
  247         EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
 
  248         EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
 
  249         if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
 
  250         if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
 
  253       auto it = allSeeds.insert(fileMetaData.getRandomSeed());
 
  255         B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) << 
" present in more then one file");
 
  257       allUsers.insert(fileMetaData.getUser());
 
  259       for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
 
  260         allParents.insert(fileMetaData.getParent(i));
 
  262     }
catch(std::exception &e) {
 
  263       B2ERROR(
"input file " << std::quoted(input) << 
": " << e.what());
 
  268   for(
const auto &val: persistentMergeables){
 
  269     if(val.second.second != inputfilenames.size()){
 
  270       B2ERROR(
"Mergeable " << std::quoted(val.first) << 
" only present in " << val.second.second << 
" out of " 
  271               << inputfilenames.size() << 
" files");
 
  276   if(allUsers.size()>1) {
 
  277       B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers, 
", "));
 
  289       B2FATAL(
"For some reason no files could be processed");
 
  293     B2WARNING(
"All Files were empty");
 
  294     lowEvt = EventInfo{-1, -1, 0};
 
  295     highEvt = EventInfo{-1, -1, 0};
 
  299   outputMetaData->setLfn(
"");
 
  300   outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
 
  301   outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
 
  302   outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
 
  304   if(inputfilenames.size()>1){
 
  305       outputMetaData->setRandomSeed(
"");
 
  311   TFile output(outputfilename.c_str(), 
"RECREATE");
 
  312   if (output.IsZombie()) {
 
  313     B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
 
  317   TTree* outputEventTree{
nullptr};
 
  318   for (
const auto& input : inputfilenames) {
 
  319     B2INFO(
"processing events from " << std::quoted(input));
 
  320     TFile tfile(input.c_str());
 
  321     auto* tree = 
dynamic_cast<TTree*
>(tfile.Get(
"tree"));
 
  322     if(!outputEventTree){
 
  324       outputEventTree = tree->CloneTree(0);
 
  326       outputEventTree->CopyAddresses(tree);
 
  331     outputEventTree->CopyEntries(tree, -1, 
"fast SortBasketsByEntry BuildIndexOnError");
 
  333     outputEventTree->CopyAddresses(tree, 
true);
 
  338   assert(outputEventTree);
 
  340   if(!outputEventTree->GetTreeIndex()) {
 
  341     B2INFO(
"No Index found: building new index");
 
  346   outputEventTree->Write();
 
  347   B2INFO(
"Done processing events");
 
  351   if (outputMetaData->getNFullEvents() == 0) {
 
  352     outputMetaData->setNFullEvents(outputEventTree->GetEntries(
"EventMetaData.m_errorFlag == 0"));
 
  356   outputMetaData->setLfn(fs::absolute(outputfilename).
string());
 
  358   if(variables.count(
"add-to-catalog")>0) {
 
  361   B2INFO(
"Writing FileMetaData");
 
  364   TTree outputMetaDataTree(
"persistent", 
"persistent");
 
  365   outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
 
  366   for(
auto &it: persistentMergeables){
 
  367     outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
 
  369   outputMetaDataTree.Fill();
 
  370   outputMetaDataTree.Write();
 
  373   for(
const auto& val: persistentMergeables){
 
  374     delete val.second.first;
 
  376   persistentMergeables.clear();
 
  377   delete outputMetaData;
 
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
void setLogInfo(ELogLevel logLevel, unsigned int logInfo)
Configure the printed log information for the given level.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes EventMetaData branch exists there).
Abstract base class for different kinds of events.
int main(int argc, char **argv)
Run all tests.