8#include <framework/dataobjects/FileMetaData.h>
9#include <framework/io/RootIOUtilities.h>
10#include <framework/io/RootFileInfo.h>
11#include <framework/logging/Logger.h>
12#include <framework/pcore/Mergeable.h>
13#include <framework/core/FileCatalog.h>
14#include <framework/utilities/KeyValuePrinter.h>
16#include <boost/program_options.hpp>
17#include <boost/algorithm/string.hpp>
21#include <TBranchElement.h>
32namespace po = boost::program_options;
33namespace fs = std::filesystem;
37using EventInfo = std::tuple<int, int, unsigned int>;
42 std::string removeLegacyGt(
const std::string& globaltags)
44 std::regex legacy_gt(
",?Legacy_IP_Information");
45 return std::regex_replace(globaltags, legacy_gt,
"");
49int main(
int argc,
char* argv[])
52 std::string outputfilename;
53 std::vector<std::string> inputfilenames;
54 po::options_description options(
"Options");
56 (
"help,h",
"print all available options")
57 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
58 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
59 (
"force,f",
"overwrite existing file")
60 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
61 (
"add-to-catalog",
"register the output file in the file catalog")
62 (
"quiet,q",
"if given don't print infos, just warnings and errors");
63 po::positional_options_description positional;
64 positional.add(
"output", 1);
65 positional.add(
"file", -1);
66 po::variables_map variables;
67 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
68 po::notify(variables);
69 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
70 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
71 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
72 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
73 std::cout << options << std::endl;
75This program is intended to merge files created by separate basf2 jobs. It's
76similar to hadd but does correctly update the metadata in the file and merges
77the objects in the persistent tree correctly.
79The following restrictions apply:
80 - The files have to be created with the same release and steering file
81 - The persistent tree is only allowed to contain FileMetaData and objects
82 inheriting from Mergeable and the same list of objects needs to be present
84 - The event tree needs to contain the same DataStore entries in all files.
94 if(variables.count(
"quiet")>0){
98 B2INFO(
"Merging files into " << std::quoted(outputfilename));
100 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
101 B2ERROR(
"Output file exists, use -f to force overwriting it");
109 std::set<std::string> allParents;
112 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
114 std::set<std::string> allSeeds;
116 std::set<std::string> allUsers;
118 std::optional<EventInfo> lowEvt, highEvt;
121 std::set<std::string> allEventBranches;
123 std::string outputRelease;
129 for (
const auto& input : inputfilenames) {
133 const auto &fileMetaData = fileInfo.getFileMetaData();
135 B2INFO(
"adding file " << std::quoted(input));
138 auto branches = fileInfo.getBranchNames();
139 if(branches.empty()) {
140 throw std::runtime_error(
"Could not find any branches in event tree");
142 if(allEventBranches.empty()) {
143 std::swap(allEventBranches,branches);
145 if(branches!=allEventBranches){
146 B2ERROR(
"Branches in " << std::quoted(input) <<
" differ from "
147 << std::quoted(inputfilenames.front()));
153 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
154 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
156 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
160 if(!br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
161 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergable");
166 br->SetAddress(&
object);
167 if(br->GetEntry(0)<=0) {
168 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
169 << std::quoted(input));
173 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
176 it.first->second.first->merge(
object);
177 }
catch(std::exception &e){
178 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " << e.what());
180 it.first->second.second++;
184 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
188 std::string release = fileMetaData.getRelease();
190 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
192 }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(),
"-modified")){
193 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
194 << fileMetaData.getRelease()
195 <<
": cannot verify that files are compatible");
196 release = release.substr(0, release.size() - std::string(
"-modified").size());
200 if (!outputMetaData) {
203 outputRelease = release;
206 if(release != outputRelease) {
207 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
208 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
210 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
212 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
214 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
219 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
220 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
222 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
223 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
226 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
228 for (
const auto& descrPair : outputMetaData->getDataDescription())
229 cur.put(descrPair.first, descrPair.second);
231 for (
const auto& descrPair : fileMetaData.getDataDescription())
232 prev.put(descrPair.first, descrPair.second);
234 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
236 if(fileMetaData.isMC() != outputMetaData->isMC()){
237 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
240 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
241 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
242 outputMetaData->setNFullEvents(outputMetaData->getNFullEvents() + fileMetaData.getNFullEvents());
244 if(fileMetaData.getNEvents() < 1) {
245 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
248 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
249 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
250 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
251 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
254 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
256 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
258 allUsers.insert(fileMetaData.getUser());
260 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
261 allParents.insert(fileMetaData.getParent(i));
263 }
catch(std::exception &e) {
264 B2ERROR(
"input file " << std::quoted(input) <<
": " << e.what());
269 for(
const auto &val: persistentMergeables){
270 if(val.second.second != inputfilenames.size()){
271 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
272 << inputfilenames.size() <<
" files");
277 if(allUsers.size()>1) {
278 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
290 B2FATAL(
"For some reason no files could be processed");
294 B2WARNING(
"All Files were empty");
295 lowEvt = EventInfo{-1, -1, 0};
296 highEvt = EventInfo{-1, -1, 0};
300 outputMetaData->setLfn(
"");
301 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
302 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
303 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
305 if(inputfilenames.size()>1){
306 outputMetaData->setRandomSeed(
"");
312 auto output = std::unique_ptr<TFile>{TFile::Open(outputfilename.c_str(),
"RECREATE")};
313 if (output ==
nullptr or output->IsZombie()) {
314 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
318 TTree* outputEventTree{
nullptr};
319 for (
const auto& input : inputfilenames) {
320 B2INFO(
"processing events from " << std::quoted(input));
321 auto tfile = std::unique_ptr<TFile>{TFile::Open(input.c_str(),
"READ")};
324 auto* tree =
dynamic_cast<TTree*
>(tfile->Get(
"tree"));
325 if (!outputEventTree){
327 outputEventTree = tree->CloneTree(0);
329 outputEventTree->CopyAddresses(tree);
334 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
336 outputEventTree->CopyAddresses(tree,
true);
341 assert(outputEventTree);
343 if(!outputEventTree->GetTreeIndex()) {
344 B2INFO(
"No Index found: building new index");
349 outputEventTree->Write();
350 B2INFO(
"Done processing events");
354 if (outputMetaData->getNFullEvents() == 0) {
355 outputMetaData->setNFullEvents(outputEventTree->GetEntries(
"EventMetaData.m_errorFlag == 0"));
359 outputMetaData->setLfn(fs::absolute(outputfilename).
string());
361 if(variables.count(
"add-to-catalog")>0) {
364 B2INFO(
"Writing FileMetaData");
367 TTree outputMetaDataTree(
"persistent",
"persistent");
368 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
369 for(
auto &it: persistentMergeables){
370 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
372 outputMetaDataTree.Fill();
373 outputMetaDataTree.Write();
376 for(
const auto& val: persistentMergeables){
377 delete val.second.first;
379 persistentMergeables.clear();
380 delete outputMetaData;
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
void setLogInfo(ELogLevel logLevel, unsigned int logInfo)
Configure the printed log information for the given level.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes EventMetaData branch exists there).
Abstract base class for different kinds of events.