8#include <framework/dataobjects/FileMetaData.h>
9#include <framework/io/RootIOUtilities.h>
10#include <framework/io/RootFileInfo.h>
11#include <framework/logging/Logger.h>
12#include <framework/pcore/Mergeable.h>
13#include <framework/core/FileCatalog.h>
14#include <framework/utilities/KeyValuePrinter.h>
15#include <framework/core/MetadataService.h>
17#include <boost/program_options.hpp>
18#include <boost/algorithm/string.hpp>
22#include <TBranchElement.h>
23#include <TDirectory.h>
37namespace po = boost::program_options;
38namespace fs = std::filesystem;
42using EventInfo = std::tuple<int, int, unsigned int>;
47 std::string removeLegacyGt(
const std::string& globaltags)
49 std::regex legacy_gt(
",?Legacy_IP_Information");
50 return std::regex_replace(globaltags, legacy_gt,
"");
59 void collectHistograms(TDirectory& dir,
const std::string& prefix,
60 std::map<std::string, std::pair<TH1*, size_t>>& histograms)
62 for (TObject* keyObj : *dir.GetListOfKeys()) {
63 auto* key =
dynamic_cast<TKey*
>(keyObj);
65 const std::string name{key->GetName()};
66 const std::string path = prefix.empty() ? name : (prefix +
"/" + name);
67 TClass* cls = TClass::GetClass(key->GetClassName());
69 if (cls->InheritsFrom(TDirectory::Class())) {
70 auto* subdir =
dynamic_cast<TDirectory*
>(key->ReadObj());
71 if (subdir) collectHistograms(*subdir, path, histograms);
72 }
else if (cls->InheritsFrom(TH1::Class())) {
73 auto* hist =
dynamic_cast<TH1*
>(key->ReadObj());
75 auto it = histograms.find(path);
76 if (it == histograms.end()) {
77 hist->SetDirectory(
nullptr);
78 histograms.emplace(path, std::make_pair(hist,
size_t{1}));
80 it->second.first->Add(hist);
92 void writeHistograms(TFile& output,
93 const std::map<std::string, std::pair<TH1*, size_t>>& histograms)
95 for (
const auto& [path, histCount] : histograms) {
97 const auto slash = path.rfind(
'/');
98 if (slash != std::string::npos) {
100 TDirectory* dir = &output;
102 const std::string dirpath = path.substr(0, slash);
103 while (start < dirpath.size()) {
104 const size_t end = dirpath.find(
'/', start);
105 const std::string part = (end == std::string::npos)
106 ? dirpath.substr(start) : dirpath.substr(start, end - start);
107 TDirectory* sub = dir->GetDirectory(part.c_str());
109 dir->mkdir(part.c_str());
110 sub = dir->GetDirectory(part.c_str());
114 if (end == std::string::npos)
break;
119 histCount.first->Write();
125int main(
int argc,
char* argv[])
128 std::string outputfilename;
129 std::vector<std::string> inputfilenames;
130 std::string jsonfilename;
131 po::options_description options(
"Options");
132 options.add_options()
133 (
"help,h",
"print all available options")
134 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
135 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
136 (
"force,f",
"overwrite existing file")
137 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
138 (
"add-to-catalog",
"register the output file in the file catalog")
139 (
"job-information", po::value<std::string>(&jsonfilename),
"create json file with metadata of output file and execution status")
140 (
"quiet,q",
"if given don't print infos, just warnings and errors");
141 po::positional_options_description positional;
142 positional.add(
"output", 1);
143 positional.add(
"file", -1);
144 po::variables_map variables;
145 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
146 po::notify(variables);
147 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
148 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
149 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
150 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
151 std::cout << options << std::endl;
153This program is intended to merge files created by separate basf2 jobs. It's
154similar to hadd but does correctly update the metadata in the file and merges
155the objects in the persistent tree correctly.
157The following restrictions apply:
158 - The files have to be created with the same release and steering file
159 - The persistent tree is only allowed to contain FileMetaData and objects
160 inheriting from Mergeable and the same list of objects needs to be present
162 - The event tree needs to contain the same DataStore entries in all files.
169 if (!jsonfilename.empty()) {
178 if(variables.count(
"quiet")>0){
182 B2INFO(
"Merging files into " << std::quoted(outputfilename));
184 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
185 B2ERROR(
"Output file exists, use -f to force overwriting it");
193 std::set<std::string> allParents;
196 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
198 std::set<std::string> allSeeds;
200 std::set<std::string> allUsers;
202 std::optional<EventInfo> lowEvt, highEvt;
205 std::map<std::string, std::set<std::string>> allEventBranches;
208 std::set<std::string> allEventTrees;
210 std::map<std::string, std::pair<TH1*, size_t>> mergedHistograms;
212 std::string outputRelease;
218 for (
const auto& input : inputfilenames) {
222 const auto &fileMetaData = fileInfo.getFileMetaData();
223 auto description = fileMetaData.getDataDescription();
224 auto isNtuple = description.find(
"isNtupleMetaData");
226 B2INFO(
"adding file " << std::quoted(input));
228 auto trees = fileInfo.getTreeNames();
229 if(allEventTrees.empty()) {
230 std::swap(allEventTrees,trees);
232 if(trees!=allEventTrees){
233 B2ERROR(
"Trees in " << std::quoted(input) <<
" differ from "
234 << std::quoted(inputfilenames.front()));
238 for(
const auto& tree : allEventTrees) {
239 auto branches = ((tree==
"tree") &&
240 ((isNtuple==description.end()) || (isNtuple->second !=
"True"))
241 ) ? fileInfo.getBranchNames() : fileInfo.getNtupleBranchNames(tree);
242 if(branches.empty()) {
243 throw std::runtime_error(
"Could not find any branches in " + tree);
245 if(allEventBranches[tree].empty()) {
246 std::swap(allEventBranches[tree],branches);
248 if(branches!=allEventBranches[tree]){
249 B2ERROR(
"Branches in " << std::quoted(input +
":" + tree) <<
" differ from "
250 << std::quoted(inputfilenames.front() +
":" + tree));
255 collectHistograms(fileInfo.getFile(),
"", mergedHistograms);
259 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
260 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
262 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
266 if(!br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
267 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergeable");
272 br->SetAddress(&
object);
273 if(br->GetEntry(0)<=0) {
274 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
275 << std::quoted(input));
279 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
282 it.first->second.first->merge(
object);
283 }
catch(std::exception &e){
284 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " << e.what());
286 it.first->second.second++;
290 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
294 std::string release = fileMetaData.getRelease();
296 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
298 }
else if (fileMetaData.getRelease().ends_with(
"-modified")) {
299 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
300 << fileMetaData.getRelease()
301 <<
": cannot verify that files are compatible");
302 release = release.substr(0, release.size() - std::string(
"-modified").size());
306 if (!outputMetaData) {
309 outputRelease = release;
312 if(release != outputRelease) {
313 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
314 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
316 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
318 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
320 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
325 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
326 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
328 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
329 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
332 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
334 for (
const auto& descrPair : outputMetaData->getDataDescription())
335 cur.put(descrPair.first, descrPair.second);
337 for (
const auto& descrPair : fileMetaData.getDataDescription())
338 prev.put(descrPair.first, descrPair.second);
340 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
342 if(fileMetaData.isMC() != outputMetaData->isMC()){
343 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
346 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
347 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
348 outputMetaData->setNFullEvents(outputMetaData->getNFullEvents() + fileMetaData.getNFullEvents());
350 if(fileMetaData.getNEvents() < 1) {
351 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
354 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
355 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
356 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
357 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
360 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
362 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
364 allUsers.insert(fileMetaData.getUser());
366 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
367 allParents.insert(fileMetaData.getParent(i));
369 }
catch(std::exception &e) {
370 B2ERROR(
"input file " << std::quoted(input) <<
": " << e.what());
375 for(
const auto &val: persistentMergeables){
376 if(val.second.second != inputfilenames.size()){
377 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
378 << inputfilenames.size() <<
" files");
383 if(allUsers.size()>1) {
384 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
396 B2FATAL(
"For some reason no files could be processed");
400 B2WARNING(
"All Files were empty");
401 lowEvt = EventInfo{-1, -1, 0};
402 highEvt = EventInfo{-1, -1, 0};
406 outputMetaData->
setLfn(
"");
407 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
408 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
409 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
411 if(inputfilenames.size()>1){
412 outputMetaData->setRandomSeed(
"");
416 outputMetaData->setRelease(outputRelease);
420 auto output = std::unique_ptr<TFile>{TFile::Open(outputfilename.c_str(),
"RECREATE")};
421 if (output ==
nullptr or output->IsZombie()) {
422 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
426 for (
const auto& treeName : allEventTrees) {
427 TTree* outputEventTree{
nullptr};
428 for (
const auto& input : inputfilenames) {
429 B2INFO(
"processing events from " << std::quoted(input +
":" + treeName));
430 auto tfile = std::unique_ptr<TFile>{TFile::Open(input.c_str(),
"READ")};
433 auto* tree =
dynamic_cast<TTree*
>(tfile->Get(treeName.c_str()));
434 if (!outputEventTree){
436 outputEventTree = tree->CloneTree(0);
438 outputEventTree->CopyAddresses(tree);
443 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
445 outputEventTree->CopyAddresses(tree,
true);
450 assert(outputEventTree);
452 if(!outputEventTree->GetTreeIndex()) {
453 B2INFO(
"No Index found: building new index");
458 outputEventTree->Write();
461 if (outputMetaData->getNFullEvents() == 0) {
462 outputMetaData->setNFullEvents(outputEventTree->GetEntries(
"EventMetaData.m_errorFlag == 0"));
466 B2INFO(
"Done processing events");
469 if (!mergedHistograms.empty()) {
470 B2INFO(
"Writing histograms");
471 writeHistograms(*output, mergedHistograms);
472 for (
const auto& [name, histCount] : mergedHistograms) {
473 if (histCount.second != inputfilenames.size()) {
474 B2ERROR(
"Histogram " << std::quoted(name) <<
" only present in "
475 << histCount.second <<
" out of " << inputfilenames.size() <<
" files");
478 for (
auto& [name, histCount] : mergedHistograms) {
479 delete histCount.first;
481 mergedHistograms.clear();
485 outputMetaData->setLfn(fs::absolute(outputfilename).
string());
487 if(variables.count(
"add-to-catalog")>0) {
490 B2INFO(
"Writing FileMetaData");
493 TTree outputMetaDataTree(
"persistent",
"persistent");
494 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
495 for(
auto &it: persistentMergeables){
496 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
498 outputMetaDataTree.Fill();
499 outputMetaDataTree.Write();
502 for(
const auto& val: persistentMergeables){
503 delete val.second.first;
505 persistentMergeables.clear();
506 auto outputMetaDataCopy = *outputMetaData;
507 delete outputMetaData;
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes either EventMetaData branch exists or is a ntuple tree).
Abstract base class for different kinds of events.