8#include <framework/dataobjects/FileMetaData.h>
9#include <framework/io/RootIOUtilities.h>
10#include <framework/io/RootFileInfo.h>
11#include <framework/logging/Logger.h>
12#include <framework/pcore/Mergeable.h>
13#include <framework/core/FileCatalog.h>
14#include <framework/utilities/KeyValuePrinter.h>
16#include <boost/program_options.hpp>
17#include <boost/algorithm/string.hpp>
21#include <TBranchElement.h>
31namespace po = boost::program_options;
32namespace fs = std::filesystem;
36using EventInfo = std::tuple<int, int, unsigned int>;
41 std::string removeLegacyGt(
const std::string& globaltags)
43 std::regex legacy_gt(
",?Legacy_IP_Information");
44 return std::regex_replace(globaltags, legacy_gt,
"");
48int main(
int argc,
char* argv[])
51 std::string outputfilename;
52 std::vector<std::string> inputfilenames;
53 po::options_description options(
"Options");
55 (
"help,h",
"print all available options")
56 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
57 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
58 (
"force,f",
"overwrite existing file")
59 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
60 (
"add-to-catalog",
"register the output file in the file catalog")
61 (
"quiet,q",
"if given don't print infos, just warnings and errors");
62 po::positional_options_description positional;
63 positional.add(
"output", 1);
64 positional.add(
"file", -1);
65 po::variables_map variables;
66 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
67 po::notify(variables);
68 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
69 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
70 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
71 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
72 std::cout << options << std::endl;
74This program is intended to merge files created by separate basf2 jobs. It's
75similar to hadd but does correctly update the metadata in the file and merges
76the objects in the persistent tree correctly.
78The following restrictions apply:
79 - The files have to be created with the same release and steering file
80 - The persistent tree is only allowed to contain FileMetaData and objects
81 inheriting from Mergeable and the same list of objects needs to be present
83 - The event tree needs to contain the same DataStore entries in all files.
93 if(variables.count(
"quiet")>0){
97 B2INFO(
"Merging files into " << std::quoted(outputfilename));
99 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
100 B2ERROR(
"Output file exists, use -f to force overwriting it");
108 std::set<std::string> allParents;
111 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
113 std::set<std::string> allSeeds;
115 std::set<std::string> allUsers;
117 std::optional<EventInfo> lowEvt, highEvt;
120 std::map<std::string, std::set<std::string>> allEventBranches;
123 std::set<std::string> allEventTrees;
125 std::string outputRelease;
131 for (
const auto& input : inputfilenames) {
135 const auto &fileMetaData = fileInfo.getFileMetaData();
136 auto description = fileMetaData.getDataDescription();
137 auto isNtuple = description.find(
"isNtupleMetaData");
139 B2INFO(
"adding file " << std::quoted(input));
141 auto trees = fileInfo.getTreeNames();
142 if(allEventTrees.empty()) {
143 std::swap(allEventTrees,trees);
145 if(trees!=allEventTrees){
146 B2ERROR(
"Trees in " << std::quoted(input) <<
" differ from "
147 << std::quoted(inputfilenames.front()));
151 for(
const auto& tree : allEventTrees) {
152 auto branches = ((tree==
"tree") &&
153 ((isNtuple==description.end()) || (isNtuple->second !=
"True"))
154 ) ? fileInfo.getBranchNames() : fileInfo.getNtupleBranchNames(tree);
155 if(branches.empty()) {
156 throw std::runtime_error(
"Could not find any branches in " + tree);
158 if(allEventBranches[tree].empty()) {
159 std::swap(allEventBranches[tree],branches);
161 if(branches!=allEventBranches[tree]){
162 B2ERROR(
"Branches in " << std::quoted(input +
":" + tree) <<
" differ from "
163 << std::quoted(inputfilenames.front() +
":" + tree));
169 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
170 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
172 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
176 if(!br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
177 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergable");
182 br->SetAddress(&
object);
183 if(br->GetEntry(0)<=0) {
184 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
185 << std::quoted(input));
189 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
192 it.first->second.first->merge(
object);
193 }
catch(std::exception &e){
194 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " << e.what());
196 it.first->second.second++;
200 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
204 std::string release = fileMetaData.getRelease();
206 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
208 }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(),
"-modified")){
209 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
210 << fileMetaData.getRelease()
211 <<
": cannot verify that files are compatible");
212 release = release.substr(0, release.size() - std::string(
"-modified").size());
216 if (!outputMetaData) {
219 outputRelease = release;
222 if(release != outputRelease) {
223 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
224 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
226 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
228 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
230 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
235 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
236 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
238 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
239 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
242 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
244 for (
const auto& descrPair : outputMetaData->getDataDescription())
245 cur.put(descrPair.first, descrPair.second);
247 for (
const auto& descrPair : fileMetaData.getDataDescription())
248 prev.put(descrPair.first, descrPair.second);
250 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
252 if(fileMetaData.isMC() != outputMetaData->isMC()){
253 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
256 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
257 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
258 outputMetaData->setNFullEvents(outputMetaData->getNFullEvents() + fileMetaData.getNFullEvents());
260 if(fileMetaData.getNEvents() < 1) {
261 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
264 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
265 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
266 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
267 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
270 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
272 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
274 allUsers.insert(fileMetaData.getUser());
276 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
277 allParents.insert(fileMetaData.getParent(i));
279 }
catch(std::exception &e) {
280 B2ERROR(
"input file " << std::quoted(input) <<
": " << e.what());
285 for(
const auto &val: persistentMergeables){
286 if(val.second.second != inputfilenames.size()){
287 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
288 << inputfilenames.size() <<
" files");
293 if(allUsers.size()>1) {
294 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
306 B2FATAL(
"For some reason no files could be processed");
310 B2WARNING(
"All Files were empty");
311 lowEvt = EventInfo{-1, -1, 0};
312 highEvt = EventInfo{-1, -1, 0};
316 outputMetaData->setLfn(
"");
317 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
318 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
319 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
321 if(inputfilenames.size()>1){
322 outputMetaData->setRandomSeed(
"");
328 TFile output(outputfilename.c_str(),
"RECREATE");
329 if (output.IsZombie()) {
330 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
334 for (
const auto& treeName : allEventTrees) {
335 TTree* outputEventTree{
nullptr};
336 for (
const auto& input : inputfilenames) {
337 B2INFO(
"processing events from " << std::quoted(input +
":" + treeName));
338 TFile tfile(input.c_str());
339 auto* tree =
dynamic_cast<TTree*
>(tfile.Get(treeName.c_str()));
340 if(!outputEventTree){
342 outputEventTree = tree->CloneTree(0);
344 outputEventTree->CopyAddresses(tree);
349 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
351 outputEventTree->CopyAddresses(tree,
true);
356 assert(outputEventTree);
358 if(!outputEventTree->GetTreeIndex()) {
359 B2INFO(
"No Index found: building new index");
364 outputEventTree->Write();
367 if (outputMetaData->getNFullEvents() == 0) {
368 outputMetaData->setNFullEvents(outputEventTree->GetEntries(
"EventMetaData.m_errorFlag == 0"));
372 B2INFO(
"Done processing events");
375 outputMetaData->setLfn(fs::absolute(outputfilename).
string());
377 if(variables.count(
"add-to-catalog")>0) {
380 B2INFO(
"Writing FileMetaData");
383 TTree outputMetaDataTree(
"persistent",
"persistent");
384 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
385 for(
auto &it: persistentMergeables){
386 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
388 outputMetaDataTree.Fill();
389 outputMetaDataTree.Write();
392 for(
const auto& val: persistentMergeables){
393 delete val.second.first;
395 persistentMergeables.clear();
396 delete outputMetaData;
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
void setLogInfo(ELogLevel logLevel, unsigned int logInfo)
Configure the printed log information for the given level.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes either EventMetaData branch exists or is a ntuple tree).
Abstract base class for different kinds of events.