8#include <framework/dataobjects/FileMetaData.h>
9#include <framework/io/RootIOUtilities.h>
10#include <framework/io/RootFileInfo.h>
11#include <framework/logging/Logger.h>
12#include <framework/pcore/Mergeable.h>
13#include <framework/core/FileCatalog.h>
14#include <framework/utilities/KeyValuePrinter.h>
16#include <boost/program_options.hpp>
17#include <boost/algorithm/string.hpp>
21#include <TBranchElement.h>
31namespace po = boost::program_options;
32namespace fs = std::filesystem;
36using EventInfo = std::tuple<int, int, unsigned int>;
41 std::string removeLegacyGt(
const std::string& globaltags)
43 std::regex legacy_gt(
",?Legacy_IP_Information");
44 return std::regex_replace(globaltags, legacy_gt,
"");
48int main(
int argc,
char* argv[])
51 std::string outputfilename;
52 std::vector<std::string> inputfilenames;
53 po::options_description options(
"Options");
55 (
"help,h",
"print all available options")
56 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
57 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
58 (
"force,f",
"overwrite existing file")
59 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
60 (
"add-to-catalog",
"register the output file in the file catalog")
61 (
"quiet,q",
"if given don't print infos, just warnings and errors");
62 po::positional_options_description positional;
63 positional.add(
"output", 1);
64 positional.add(
"file", -1);
65 po::variables_map variables;
66 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
67 po::notify(variables);
68 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
69 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
70 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
71 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
72 std::cout << options << std::endl;
74This program is intended to merge files created by separate basf2 jobs. It's
75similar to hadd but does correctly update the metadata in the file and merges
76the objects in the persistent tree correctly.
78The following restrictions apply:
79 - The files have to be created with the same release and steering file
80 - The persistent tree is only allowed to contain FileMetaData and objects
81 inheriting from Mergeable and the same list of objects needs to be present
83 - The event tree needs to contain the same DataStore entries in all files.
93 if(variables.count(
"quiet")>0){
97 B2INFO(
"Merging files into " << std::quoted(outputfilename));
99 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
100 B2ERROR(
"Output file exists, use -f to force overwriting it");
108 std::set<std::string> allParents;
111 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
113 std::set<std::string> allSeeds;
115 std::set<std::string> allUsers;
117 std::optional<EventInfo> lowEvt, highEvt;
120 std::set<std::string> allEventBranches;
122 std::string outputRelease;
128 for (
const auto& input : inputfilenames) {
132 const auto &fileMetaData = fileInfo.getFileMetaData();
134 B2INFO(
"adding file " << std::quoted(input));
137 auto branches = fileInfo.getBranchNames();
138 if(branches.empty()) {
139 throw std::runtime_error(
"Could not find any branches in event tree");
141 if(allEventBranches.empty()) {
142 std::swap(allEventBranches,branches);
144 if(branches!=allEventBranches){
145 B2ERROR(
"Branches in " << std::quoted(input) <<
" differ from "
146 << std::quoted(inputfilenames.front()));
152 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
153 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
155 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
159 if(!br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
160 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergable");
165 br->SetAddress(&
object);
166 if(br->GetEntry(0)<=0) {
167 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
168 << std::quoted(input));
172 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
175 it.first->second.first->merge(
object);
176 }
catch(std::exception &e){
177 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " << e.what());
179 it.first->second.second++;
183 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
187 std::string release = fileMetaData.getRelease();
189 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
191 }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(),
"-modified")){
192 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
193 << fileMetaData.getRelease()
194 <<
": cannot verify that files are compatible");
195 release = release.substr(0, release.size() - std::string(
"-modified").size());
199 if (!outputMetaData) {
202 outputRelease = release;
205 if(release != outputRelease) {
206 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
207 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
209 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
211 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
213 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
218 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
219 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
221 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
222 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
225 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
227 for (
const auto& descrPair : outputMetaData->getDataDescription())
228 cur.put(descrPair.first, descrPair.second);
230 for (
const auto& descrPair : fileMetaData.getDataDescription())
231 prev.put(descrPair.first, descrPair.second);
233 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
235 if(fileMetaData.isMC() != outputMetaData->isMC()){
236 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
239 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
240 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
241 outputMetaData->setNFullEvents(outputMetaData->getNFullEvents() + fileMetaData.getNFullEvents());
243 if(fileMetaData.getNEvents() < 1) {
244 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
247 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
248 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
249 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
250 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
253 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
255 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
257 allUsers.insert(fileMetaData.getUser());
259 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
260 allParents.insert(fileMetaData.getParent(i));
262 }
catch(std::exception &e) {
263 B2ERROR(
"input file " << std::quoted(input) <<
": " << e.what());
268 for(
const auto &val: persistentMergeables){
269 if(val.second.second != inputfilenames.size()){
270 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
271 << inputfilenames.size() <<
" files");
276 if(allUsers.size()>1) {
277 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
289 B2FATAL(
"For some reason no files could be processed");
293 B2WARNING(
"All Files were empty");
294 lowEvt = EventInfo{-1, -1, 0};
295 highEvt = EventInfo{-1, -1, 0};
299 outputMetaData->setLfn(
"");
300 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
301 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
302 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
304 if(inputfilenames.size()>1){
305 outputMetaData->setRandomSeed(
"");
311 TFile output(outputfilename.c_str(),
"RECREATE");
312 if (output.IsZombie()) {
313 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
317 TTree* outputEventTree{
nullptr};
318 for (
const auto& input : inputfilenames) {
319 B2INFO(
"processing events from " << std::quoted(input));
320 TFile tfile(input.c_str());
321 auto* tree =
dynamic_cast<TTree*
>(tfile.Get(
"tree"));
322 if(!outputEventTree){
324 outputEventTree = tree->CloneTree(0);
326 outputEventTree->CopyAddresses(tree);
331 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
333 outputEventTree->CopyAddresses(tree,
true);
338 assert(outputEventTree);
340 if(!outputEventTree->GetTreeIndex()) {
341 B2INFO(
"No Index found: building new index");
346 outputEventTree->Write();
347 B2INFO(
"Done processing events");
351 if (outputMetaData->getNFullEvents() == 0) {
352 outputMetaData->setNFullEvents(outputEventTree->GetEntries(
"EventMetaData.m_errorFlag == 0"));
356 outputMetaData->setLfn(fs::absolute(outputfilename).
string());
358 if(variables.count(
"add-to-catalog")>0) {
361 B2INFO(
"Writing FileMetaData");
364 TTree outputMetaDataTree(
"persistent",
"persistent");
365 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
366 for(
auto &it: persistentMergeables){
367 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
369 outputMetaDataTree.Fill();
370 outputMetaDataTree.Write();
373 for(
const auto& val: persistentMergeables){
374 delete val.second.first;
376 persistentMergeables.clear();
377 delete outputMetaData;
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
void setLogInfo(ELogLevel logLevel, unsigned int logInfo)
Configure the printed log information for the given level.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes EventMetaData branch exists there).
Abstract base class for different kinds of events.