8#include <framework/dataobjects/FileMetaData.h>
9#include <framework/io/RootIOUtilities.h>
10#include <framework/io/RootFileInfo.h>
11#include <framework/logging/Logger.h>
12#include <framework/pcore/Mergeable.h>
13#include <framework/core/FileCatalog.h>
14#include <framework/utilities/KeyValuePrinter.h>
15#include <framework/core/MetadataService.h>
17#include <boost/program_options.hpp>
18#include <boost/algorithm/string.hpp>
22#include <TBranchElement.h>
33namespace po = boost::program_options;
34namespace fs = std::filesystem;
38using EventInfo = std::tuple<int, int, unsigned int>;
43 std::string removeLegacyGt(
const std::string& globaltags)
45 std::regex legacy_gt(
",?Legacy_IP_Information");
46 return std::regex_replace(globaltags, legacy_gt,
"");
50int main(
int argc,
char* argv[])
53 std::string outputfilename;
54 std::vector<std::string> inputfilenames;
55 std::string jsonfilename;
56 po::options_description options(
"Options");
58 (
"help,h",
"print all available options")
59 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
60 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
61 (
"force,f",
"overwrite existing file")
62 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
63 (
"add-to-catalog",
"register the output file in the file catalog")
64 (
"job-information", po::value<std::string>(&jsonfilename),
"create json file with metadata of output file and execution status")
65 (
"quiet,q",
"if given don't print infos, just warnings and errors");
66 po::positional_options_description positional;
67 positional.add(
"output", 1);
68 positional.add(
"file", -1);
69 po::variables_map variables;
70 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
71 po::notify(variables);
72 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
73 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
74 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
75 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
76 std::cout << options << std::endl;
78This program is intended to merge files created by separate basf2 jobs. It's
79similar to hadd but does correctly update the metadata in the file and merges
80the objects in the persistent tree correctly.
82The following restrictions apply:
83 - The files have to be created with the same release and steering file
84 - The persistent tree is only allowed to contain FileMetaData and objects
85 inheriting from Mergeable and the same list of objects needs to be present
87 - The event tree needs to contain the same DataStore entries in all files.
94 if (!jsonfilename.empty()) {
103 if(variables.count(
"quiet")>0){
107 B2INFO(
"Merging files into " << std::quoted(outputfilename));
109 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
110 B2ERROR(
"Output file exists, use -f to force overwriting it");
118 std::set<std::string> allParents;
121 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
123 std::set<std::string> allSeeds;
125 std::set<std::string> allUsers;
127 std::optional<EventInfo> lowEvt, highEvt;
130 std::map<std::string, std::set<std::string>> allEventBranches;
133 std::set<std::string> allEventTrees;
135 std::string outputRelease;
141 for (
const auto& input : inputfilenames) {
145 const auto &fileMetaData = fileInfo.getFileMetaData();
146 auto description = fileMetaData.getDataDescription();
147 auto isNtuple = description.find(
"isNtupleMetaData");
149 B2INFO(
"adding file " << std::quoted(input));
151 auto trees = fileInfo.getTreeNames();
152 if(allEventTrees.empty()) {
153 std::swap(allEventTrees,trees);
155 if(trees!=allEventTrees){
156 B2ERROR(
"Trees in " << std::quoted(input) <<
" differ from "
157 << std::quoted(inputfilenames.front()));
161 for(
const auto& tree : allEventTrees) {
162 auto branches = ((tree==
"tree") &&
163 ((isNtuple==description.end()) || (isNtuple->second !=
"True"))
164 ) ? fileInfo.getBranchNames() : fileInfo.getNtupleBranchNames(tree);
165 if(branches.empty()) {
166 throw std::runtime_error(
"Could not find any branches in " + tree);
168 if(allEventBranches[tree].empty()) {
169 std::swap(allEventBranches[tree],branches);
171 if(branches!=allEventBranches[tree]){
172 B2ERROR(
"Branches in " << std::quoted(input +
":" + tree) <<
" differ from "
173 << std::quoted(inputfilenames.front() +
":" + tree));
179 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
180 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
182 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
186 if(!br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
187 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergeable");
192 br->SetAddress(&
object);
193 if(br->GetEntry(0)<=0) {
194 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
195 << std::quoted(input));
199 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
202 it.first->second.first->merge(
object);
203 }
catch(std::exception &e){
204 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " << e.what());
206 it.first->second.second++;
210 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
214 std::string release = fileMetaData.getRelease();
216 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
218 }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(),
"-modified")){
219 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
220 << fileMetaData.getRelease()
221 <<
": cannot verify that files are compatible");
222 release = release.substr(0, release.size() - std::string(
"-modified").size());
226 if (!outputMetaData) {
229 outputRelease = release;
232 if(release != outputRelease) {
233 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
234 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
236 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
238 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
240 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
245 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
246 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
248 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
249 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
252 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
254 for (
const auto& descrPair : outputMetaData->getDataDescription())
255 cur.put(descrPair.first, descrPair.second);
257 for (
const auto& descrPair : fileMetaData.getDataDescription())
258 prev.put(descrPair.first, descrPair.second);
260 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
262 if(fileMetaData.isMC() != outputMetaData->isMC()){
263 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
266 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
267 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
268 outputMetaData->setNFullEvents(outputMetaData->getNFullEvents() + fileMetaData.getNFullEvents());
270 if(fileMetaData.getNEvents() < 1) {
271 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
274 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
275 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
276 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
277 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
280 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
282 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
284 allUsers.insert(fileMetaData.getUser());
286 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
287 allParents.insert(fileMetaData.getParent(i));
289 }
catch(std::exception &e) {
290 B2ERROR(
"input file " << std::quoted(input) <<
": " << e.what());
295 for(
const auto &val: persistentMergeables){
296 if(val.second.second != inputfilenames.size()){
297 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
298 << inputfilenames.size() <<
" files");
303 if(allUsers.size()>1) {
304 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
316 B2FATAL(
"For some reason no files could be processed");
320 B2WARNING(
"All Files were empty");
321 lowEvt = EventInfo{-1, -1, 0};
322 highEvt = EventInfo{-1, -1, 0};
326 outputMetaData->setLfn(
"");
327 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
328 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
329 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
331 if(inputfilenames.size()>1){
332 outputMetaData->setRandomSeed(
"");
336 outputMetaData->setRelease(outputRelease);
340 auto output = std::unique_ptr<TFile>{TFile::Open(outputfilename.c_str(),
"RECREATE")};
341 if (output ==
nullptr or output->IsZombie()) {
342 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
346 for (
const auto& treeName : allEventTrees) {
347 TTree* outputEventTree{
nullptr};
348 for (
const auto& input : inputfilenames) {
349 B2INFO(
"processing events from " << std::quoted(input +
":" + treeName));
350 auto tfile = std::unique_ptr<TFile>{TFile::Open(input.c_str(),
"READ")};
353 auto* tree =
dynamic_cast<TTree*
>(tfile->Get(treeName.c_str()));
354 if (!outputEventTree){
356 outputEventTree = tree->CloneTree(0);
358 outputEventTree->CopyAddresses(tree);
363 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
365 outputEventTree->CopyAddresses(tree,
true);
370 assert(outputEventTree);
372 if(!outputEventTree->GetTreeIndex()) {
373 B2INFO(
"No Index found: building new index");
378 outputEventTree->Write();
381 if (outputMetaData->getNFullEvents() == 0) {
382 outputMetaData->setNFullEvents(outputEventTree->GetEntries(
"EventMetaData.m_errorFlag == 0"));
386 B2INFO(
"Done processing events");
389 outputMetaData->setLfn(fs::absolute(outputfilename).
string());
391 if(variables.count(
"add-to-catalog")>0) {
394 B2INFO(
"Writing FileMetaData");
397 TTree outputMetaDataTree(
"persistent",
"persistent");
398 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
399 for(
auto &it: persistentMergeables){
400 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
402 outputMetaDataTree.Fill();
403 outputMetaDataTree.Write();
406 for(
const auto& val: persistentMergeables){
407 delete val.second.first;
409 persistentMergeables.clear();
410 auto outputMetaDataCopy = *outputMetaData;
411 delete outputMetaData;
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
void setLogInfo(ELogLevel logLevel, unsigned int logInfo)
Configure the printed log information for the given level.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes either EventMetaData branch exists or is a ntuple tree).
Abstract base class for different kinds of events.