8 #include <framework/dataobjects/FileMetaData.h>
9 #include <framework/io/RootIOUtilities.h>
10 #include <framework/io/RootFileInfo.h>
11 #include <framework/logging/Logger.h>
12 #include <framework/pcore/Mergeable.h>
13 #include <framework/core/FileCatalog.h>
14 #include <framework/utilities/KeyValuePrinter.h>
16 #include <boost/program_options.hpp>
17 #include <boost/filesystem.hpp>
18 #include <boost/algorithm/string.hpp>
22 #include <TBranchElement.h>
31 namespace po = boost::program_options;
32 namespace fs = boost::filesystem;
36 using EventInfo = std::tuple<int, int, unsigned int>;
41 std::string removeLegacyGt(
const std::string& globaltags)
43 std::regex legacy_gt(
",?Legacy_IP_Information");
44 return std::regex_replace(globaltags, legacy_gt,
"");
48 int main(
int argc,
char* argv[])
51 std::string outputfilename;
52 std::vector<std::string> inputfilenames;
53 po::options_description options(
"Options");
55 (
"help,h",
"print all available options")
56 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
57 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
58 (
"force,f",
"overwrite existing file")
59 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
60 (
"add-to-catalog",
"register the output file in the file catalog")
61 (
"quiet,q",
"if given don't print infos, just warnings and errors");
62 po::positional_options_description positional;
63 positional.add(
"output", 1);
64 positional.add(
"file", -1);
65 po::variables_map variables;
66 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
67 po::notify(variables);
68 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
69 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
70 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
71 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
72 std::cout << options << std::endl;
74 This program is intended to merge files created by separate basf2 jobs. It's
75 similar to hadd but does correctly update the metadata in the file and merges
76 the objects in the persistent tree correctly.
78 The following restrictions apply:
79 - The files have to be created with the same release and steering file
80 - The persistent tree is only allowed to contain FileMetaData and objects
81 inheriting from Mergeable and the same list of objects needs to be present
83 - The event tree needs to contain the same DataStore entries in all files.
93 if(variables.count(
"quiet")>0){
97 B2INFO(
"Merging files into " << std::quoted(outputfilename));
99 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
100 B2ERROR(
"Output file exists, use -f to force overwriting it");
108 std::set<std::string> allParents;
111 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
113 std::set<std::string> allSeeds;
115 std::set<std::string> allUsers;
117 std::optional<EventInfo> lowEvt, highEvt;
120 std::set<std::string> allEventBranches;
122 std::string outputRelease;
128 for (
const auto& input : inputfilenames) {
132 const auto &fileMetaData = fileInfo.getFileMetaData();
134 B2INFO(
"adding file " << std::quoted(input));
137 auto branches = fileInfo.getBranchNames();
138 if(branches.empty()) {
139 throw std::runtime_error(
"Could not find any branches in event tree");
141 if(allEventBranches.empty()) {
142 std::swap(allEventBranches,branches);
144 if(branches!=allEventBranches){
145 B2ERROR(
"Branches in " << std::quoted(input) <<
" differ from "
146 << std::quoted(inputfilenames.front()));
152 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
153 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
155 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
159 if(!br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
160 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergable");
165 br->SetAddress(&
object);
166 if(br->GetEntry(0)<=0) {
167 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
168 << std::quoted(input));
172 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
175 it.first->second.first->merge(
object);
176 }
catch(std::exception &e){
177 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " << e.what());
179 it.first->second.second++;
183 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
187 std::string release = fileMetaData.getRelease();
189 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
191 }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(),
"-modified")){
192 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
193 << fileMetaData.getRelease()
194 <<
": cannot verify that files are compatible");
195 release = release.substr(0, release.size() - std::string(
"-modified").size());
199 if (!outputMetaData) {
202 outputRelease = release;
205 if(release != outputRelease) {
206 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
207 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
209 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
211 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
213 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
218 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
219 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
221 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
222 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
225 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
227 for (
const auto& descrPair : outputMetaData->getDataDescription())
228 cur.put(descrPair.first, descrPair.second);
230 for (
const auto& descrPair : fileMetaData.getDataDescription())
231 prev.put(descrPair.first, descrPair.second);
233 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
235 if(fileMetaData.isMC() != outputMetaData->isMC()){
236 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
239 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
240 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
242 if(fileMetaData.getNEvents() < 1) {
243 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
246 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
247 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
248 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
249 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
252 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
254 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
256 allUsers.insert(fileMetaData.getUser());
258 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
259 allParents.insert(fileMetaData.getParent(i));
261 }
catch(std::exception &e) {
262 B2ERROR(
"input file " << std::quoted(input) <<
": " << e.what());
267 for(
const auto &val: persistentMergeables){
268 if(val.second.second != inputfilenames.size()){
269 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
270 << inputfilenames.size() <<
" files");
275 if(allUsers.size()>1) {
276 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
288 B2FATAL(
"For some reason no files could be processed");
292 B2WARNING(
"All Files were empty");
293 lowEvt = EventInfo{-1, -1, 0};
294 highEvt = EventInfo{-1, -1, 0};
298 outputMetaData->setLfn(
"");
299 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
300 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
301 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
303 if(inputfilenames.size()>1){
304 outputMetaData->setRandomSeed(
"");
310 TFile output(outputfilename.c_str(),
"RECREATE");
311 if (output.IsZombie()) {
312 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
316 TTree* outputEventTree{
nullptr};
317 for (
const auto& input : inputfilenames) {
318 B2INFO(
"processing events from " << std::quoted(input));
319 TFile tfile(input.c_str());
320 auto* tree =
dynamic_cast<TTree*
>(tfile.Get(
"tree"));
321 if(!outputEventTree){
323 outputEventTree = tree->CloneTree(0);
325 outputEventTree->CopyAddresses(tree);
330 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
332 outputEventTree->CopyAddresses(tree,
true);
338 if(!outputEventTree->GetTreeIndex()) {
339 B2INFO(
"No Index found: building new index");
344 outputEventTree->Write();
345 B2INFO(
"Done processing events");
348 outputMetaData->setLfn(fs::absolute(outputfilename, fs::initial_path()).
string());
350 if(variables.count(
"add-to-catalog")>0) {
353 B2INFO(
"Writing FileMetaData");
356 TTree outputMetaDataTree(
"persistent",
"persistent");
357 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
358 for(
auto &it: persistentMergeables){
359 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
361 outputMetaDataTree.Fill();
362 outputMetaDataTree.Write();
365 for(
const auto& val: persistentMergeables){
366 delete val.second.first;
368 persistentMergeables.clear();
369 delete outputMetaData;
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
void setLogInfo(ELogLevel logLevel, unsigned int logInfo)
Configure the printed log information for the given level.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes EventMetaData branch exists there).
Abstract base class for different kinds of events.
int main(int argc, char **argv)
Run all tests.