1 #include <framework/dataobjects/FileMetaData.h>
2 #include <framework/io/RootIOUtilities.h>
3 #include <framework/io/RootFileInfo.h>
4 #include <framework/logging/Logger.h>
5 #include <framework/pcore/Mergeable.h>
6 #include <framework/core/FileCatalog.h>
7 #include <framework/utilities/KeyValuePrinter.h>
9 #include <boost/program_options.hpp>
10 #include <boost/filesystem.hpp>
11 #include <boost/algorithm/string.hpp>
15 #include <TBranchElement.h>
24 namespace po = boost::program_options;
25 namespace fs = boost::filesystem;
29 using EventInfo = std::tuple<int, int, unsigned int>;
34 std::string removeLegacyGt(
const std::string& globaltags)
36 std::regex legacy_gt(
",?Legacy_IP_Information");
37 return std::regex_replace(globaltags, legacy_gt,
"");
41 int main(
int argc,
char* argv[])
44 std::string outputfilename;
45 std::vector<std::string> inputfilenames;
46 po::options_description options(
"Options");
48 (
"help,h",
"print all available options")
49 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
50 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
51 (
"force,f",
"overwrite existing file")
52 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
53 (
"add-to-catalog",
"register the output file in the file catalog")
54 (
"quiet,q",
"if given don't print infos, just warnings and errors");
55 po::positional_options_description positional;
56 positional.add(
"output", 1);
57 positional.add(
"file", -1);
58 po::variables_map variables;
59 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
60 po::notify(variables);
61 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
62 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
63 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
64 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
65 std::cout << options << std::endl;
67 This program is intended to merge files created by separate basf2 jobs. It's
68 similar to hadd but does correctly update the metadata in the file and merges
69 the objects in the persistent tree correctly.
71 The following restrictions apply:
72 - The files have to be created with the same release and steering file
73 - The persistent tree is only allowed to contain FileMetaData and objects
74 inheriting from Mergeable and the same list of objects needs to be present
76 - The event tree needs to contain the same DataStore entries in all files.
86 if(variables.count(
"quiet")>0){
90 B2INFO(
"Merging files into " << std::quoted(outputfilename));
92 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
93 B2ERROR(
"Output file exists, use -f to force overwriting it");
101 std::set<std::string> allParents;
104 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
106 std::set<std::string> allSeeds;
108 std::set<std::string> allUsers;
110 std::optional<EventInfo> lowEvt, highEvt;
113 std::set<std::string> allEventBranches;
115 std::string outputRelease;
121 for (
const auto& input : inputfilenames) {
125 const auto &fileMetaData = fileInfo.getFileMetaData();
127 B2INFO(
"adding file " << std::quoted(input));
130 auto branches = fileInfo.getBranchNames();
131 if(branches.empty()) {
132 throw std::runtime_error(
"Could not find any branches in event tree");
134 if(allEventBranches.empty()) {
135 std::swap(allEventBranches,branches);
137 if(branches!=allEventBranches){
138 B2ERROR(
"Branches in " << std::quoted(input) <<
" differ from "
139 << std::quoted(inputfilenames.front()));
145 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
146 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
148 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
151 if(!br || !br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
152 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergable");
157 br->SetAddress(&
object);
158 if(br->GetEntry(0)<=0) {
159 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
160 << std::quoted(input));
164 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
167 it.first->second.first->merge(
object);
168 }
catch(std::exception &e){
169 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " <<
e.what());
171 it.first->second.second++;
175 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
179 std::string release = fileMetaData.getRelease();
181 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
183 }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(),
"-modified")){
184 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
185 << fileMetaData.getRelease()
186 <<
": cannot verify that files are compatible");
187 release = release.substr(0, release.size() - std::string(
"-modified").size());
191 if (!outputMetaData) {
194 outputRelease = release;
197 if(release != outputRelease) {
198 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
199 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
201 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
203 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
205 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
210 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
211 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
213 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
214 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
217 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
219 for (
const auto& descrPair : outputMetaData->getDataDescription())
220 cur.put(descrPair.first, descrPair.second);
222 for (
const auto& descrPair : fileMetaData.getDataDescription())
223 prev.put(descrPair.first, descrPair.second);
225 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
227 if(fileMetaData.isMC() != outputMetaData->isMC()){
228 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
231 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
232 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
234 if(fileMetaData.getNEvents() < 1) {
235 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
238 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
239 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
240 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
241 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
244 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
246 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
248 allUsers.insert(fileMetaData.getUser());
250 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
251 allParents.insert(fileMetaData.getParent(i));
253 }
catch(std::exception &e) {
254 B2ERROR(
"input file " << std::quoted(input) <<
": " <<
e.what());
259 for(
const auto &val: persistentMergeables){
260 if(val.second.second != inputfilenames.size()){
261 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
262 << inputfilenames.size() <<
" files");
267 if(allUsers.size()>1) {
268 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
280 B2FATAL(
"For some reason no files could be processed");
284 B2WARNING(
"All Files were empty");
285 lowEvt = EventInfo{-1, -1, 0};
286 highEvt = EventInfo{-1, -1, 0};
290 outputMetaData->setLfn(
"");
291 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
292 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
293 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
295 if(inputfilenames.size()>1){
296 outputMetaData->setRandomSeed(
"");
302 TFile output(outputfilename.c_str(),
"RECREATE");
303 if (output.IsZombie()) {
304 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
308 TTree* outputEventTree{
nullptr};
309 for (
const auto& input : inputfilenames) {
310 B2INFO(
"processing events from " << std::quoted(input));
311 TFile tfile(input.c_str());
312 auto* tree =
dynamic_cast<TTree*
>(tfile.Get(
"tree"));
313 if(!outputEventTree){
315 outputEventTree = tree->CloneTree(0);
317 outputEventTree->CopyAddresses(tree);
322 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
324 outputEventTree->CopyAddresses(tree,
true);
330 if(!outputEventTree->GetTreeIndex()) {
331 B2INFO(
"No Index found: building new index");
336 outputEventTree->Write();
337 B2INFO(
"Done processing events");
340 outputMetaData->setLfn(fs::absolute(outputfilename, fs::initial_path()).
string());
342 if(variables.count(
"add-to-catalog")>0) {
345 B2INFO(
"Writing FileMetaData");
348 TTree outputMetaDataTree(
"persistent",
"persistent");
349 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
350 for(
auto &it: persistentMergeables){
351 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
353 outputMetaDataTree.Fill();
354 outputMetaDataTree.Write();
357 for(
const auto& val: persistentMergeables){
358 delete val.second.first;
360 persistentMergeables.clear();
361 delete outputMetaData;