8 #include <framework/dataobjects/FileMetaData.h>
9 #include <framework/io/RootIOUtilities.h>
10 #include <framework/io/RootFileInfo.h>
11 #include <framework/logging/Logger.h>
12 #include <framework/pcore/Mergeable.h>
13 #include <framework/core/FileCatalog.h>
14 #include <framework/utilities/KeyValuePrinter.h>
16 #include <boost/program_options.hpp>
17 #include <boost/filesystem.hpp>
18 #include <boost/algorithm/string.hpp>
22 #include <TBranchElement.h>
31 namespace po = boost::program_options;
32 namespace fs = boost::filesystem;
36 using EventInfo = std::tuple<int, int, unsigned int>;
41 std::string removeLegacyGt(
const std::string& globaltags)
43 std::regex legacy_gt(
",?Legacy_IP_Information");
44 return std::regex_replace(globaltags, legacy_gt,
"");
48 int main(
int argc,
char* argv[])
51 std::string outputfilename;
52 std::vector<std::string> inputfilenames;
53 po::options_description options(
"Options");
55 (
"help,h",
"print all available options")
56 (
"output,o", po::value<std::string>(&outputfilename),
"output file name")
57 (
"file", po::value<std::vector<std::string>>(&inputfilenames),
"filename to merge")
58 (
"force,f",
"overwrite existing file")
59 (
"no-catalog",
"don't register output file in file catalog, This is now the default")
60 (
"add-to-catalog",
"register the output file in the file catalog")
61 (
"quiet,q",
"if given don't print infos, just warnings and errors");
62 po::positional_options_description positional;
63 positional.add(
"output", 1);
64 positional.add(
"file", -1);
65 po::variables_map variables;
66 po::store(po::command_line_parser(argc, argv).options(options).positional(positional).run(), variables);
67 po::notify(variables);
68 if (variables.count(
"help") || variables.count(
"output") == 0 || inputfilenames.empty()) {
69 std::cout <<
"Usage: " << argv[0] <<
" [<options>] OUTPUTFILE INPUTFILE [INPUTFILE...]" << std::endl;
70 std::cout <<
" " << argv[0] <<
" [<options>] [--file INPUTFILE...] "
71 <<
"-o OUTPUTFILE [--file INPUTFILE...]" << std::endl << std::endl;
72 std::cout << options << std::endl;
74 This program is intended to merge files created by separate basf2 jobs. It's
75 similar to hadd but does correctly update the metadata in the file and merges
76 the objects in the persistent tree correctly.
78 The following restrictions apply:
79 - The files have to be created with the same release and steering file
80 - The persistent tree is only allowed to contain FileMetaData and objects
81 inheriting from Mergeable and the same list of objects needs to be present
83 - The event tree needs to contain the same DataStore entries in all files.
93 if(variables.count(
"quiet")>0){
97 B2INFO(
"Merging files into " << std::quoted(outputfilename));
99 if (fs::exists(outputfilename) && variables.count(
"force")==0) {
100 B2ERROR(
"Output file exists, use -f to force overwriting it");
108 std::set<std::string> allParents;
111 std::map<std::string, std::pair<Mergeable*, size_t>> persistentMergeables;
113 std::set<std::string> allSeeds;
115 std::set<std::string> allUsers;
117 std::optional<EventInfo> lowEvt, highEvt;
120 std::set<std::string> allEventBranches;
122 std::string outputRelease;
128 for (
const auto& input : inputfilenames) {
132 const auto &fileMetaData = fileInfo.getFileMetaData();
134 B2INFO(
"adding file " << std::quoted(input));
137 auto branches = fileInfo.getBranchNames();
138 if(branches.empty()) {
139 throw std::runtime_error(
"Could not find any branches in event tree");
141 if(allEventBranches.empty()) {
142 std::swap(allEventBranches,branches);
144 if(branches!=allEventBranches){
145 B2ERROR(
"Branches in " << std::quoted(input) <<
" differ from "
146 << std::quoted(inputfilenames.front()));
152 for(TObject* brObj: *fileInfo.getPersistentTree().GetListOfBranches()){
153 auto* br =
dynamic_cast<TBranchElement*
>(brObj);
155 if(br && br->GetTargetClass() == FileMetaData::Class() && std::string(br->GetName()) ==
"FileMetaData")
158 if(!br || !br->GetTargetClass()->InheritsFrom(Mergeable::Class())){
159 B2ERROR(
"Branch " << std::quoted(br->GetName()) <<
" in persistent tree not inheriting from Mergable");
164 br->SetAddress(&
object);
165 if(br->GetEntry(0)<=0) {
166 B2ERROR(
"Could not read branch " << std::quoted(br->GetName()) <<
" of entry 0 from persistent tree in "
167 << std::quoted(input));
171 auto it = persistentMergeables.insert(std::make_pair(br->GetName(), std::make_pair(
object, 1)));
174 it.first->second.first->merge(
object);
175 }
catch(std::exception &e){
176 B2FATAL(
"Cannot merge " << std::quoted(br->GetName()) <<
" in " << std::quoted(input) <<
": " << e.what());
178 it.first->second.second++;
182 B2INFO(
"Found mergeable object " << std::quoted(br->GetName()) <<
" in persistent tree");
186 std::string release = fileMetaData.getRelease();
188 B2ERROR(
"Cannot determine release used to create " << std::quoted(input));
190 }
else if(boost::algorithm::ends_with(fileMetaData.getRelease(),
"-modified")){
191 B2WARNING(
"File " << std::quoted(input) <<
" created with modified software "
192 << fileMetaData.getRelease()
193 <<
": cannot verify that files are compatible");
194 release = release.substr(0, release.size() - std::string(
"-modified").size());
198 if (!outputMetaData) {
201 outputRelease = release;
204 if(release != outputRelease) {
205 B2ERROR(
"Release in " << std::quoted(input) <<
" differs from previous files: " <<
206 fileMetaData.getRelease() <<
" != " << outputMetaData->getRelease());
208 if(fileMetaData.getSteering() != outputMetaData->getSteering()){
210 B2ERROR(
"Steering file for " << std::quoted(input) <<
" differs from previous files.");
212 if(fileMetaData.getDatabaseGlobalTag() != outputMetaData->getDatabaseGlobalTag()){
217 if(removeLegacyGt(fileMetaData.getDatabaseGlobalTag()) == removeLegacyGt(outputMetaData->getDatabaseGlobalTag())) {
218 outputMetaData->setDatabaseGlobalTag(removeLegacyGt(outputMetaData->getDatabaseGlobalTag()));
220 B2ERROR(
"Database globalTag in " << std::quoted(input) <<
" differs from previous files: " <<
221 fileMetaData.getDatabaseGlobalTag() <<
" != " << outputMetaData->getDatabaseGlobalTag());
224 if(fileMetaData.getDataDescription() != outputMetaData->getDataDescription()){
226 for (
const auto& descrPair : outputMetaData->getDataDescription())
227 cur.put(descrPair.first, descrPair.second);
229 for (
const auto& descrPair : fileMetaData.getDataDescription())
230 prev.put(descrPair.first, descrPair.second);
232 B2ERROR(
"dataDescription in " << std::quoted(input) <<
" differs from previous files:\n" << cur.string() <<
" vs.\n" << prev.string());
234 if(fileMetaData.isMC() != outputMetaData->isMC()){
235 B2ERROR(
"Type (real/MC) for " << std::quoted(input) <<
" differs from previous files.");
238 outputMetaData->setMcEvents(outputMetaData->getMcEvents() + fileMetaData.getMcEvents());
239 outputMetaData->setNEvents(outputMetaData->getNEvents() + fileMetaData.getNEvents());
241 if(fileMetaData.getNEvents() < 1) {
242 B2WARNING(
"File " << std::quoted(input) <<
" is empty.");
245 EventInfo curLowEvt = EventInfo{fileMetaData.getExperimentLow(), fileMetaData.getRunLow(), fileMetaData.getEventLow()};
246 EventInfo curHighEvt = EventInfo{fileMetaData.getExperimentHigh(), fileMetaData.getRunHigh(), fileMetaData.getEventHigh()};
247 if(!lowEvt or curLowEvt < *lowEvt) lowEvt = curLowEvt;
248 if(!highEvt or curHighEvt > *highEvt) highEvt = curHighEvt;
251 auto it = allSeeds.insert(fileMetaData.getRandomSeed());
253 B2WARNING(
"Duplicate Random Seed: " << std::quoted(fileMetaData.getRandomSeed()) <<
" present in more then one file");
255 allUsers.insert(fileMetaData.getUser());
257 for (
int i = 0; i < fileMetaData.getNParents(); ++i) {
258 allParents.insert(fileMetaData.getParent(i));
260 }
catch(std::exception &e) {
261 B2ERROR(
"input file " << std::quoted(input) <<
": " << e.what());
266 for(
const auto &val: persistentMergeables){
267 if(val.second.second != inputfilenames.size()){
268 B2ERROR(
"Mergeable " << std::quoted(val.first) <<
" only present in " << val.second.second <<
" out of "
269 << inputfilenames.size() <<
" files");
274 if(allUsers.size()>1) {
275 B2WARNING(
"Multiple different users created input files: " << boost::algorithm::join(allUsers,
", "));
287 B2FATAL(
"For some reason no files could be processed");
291 B2WARNING(
"All Files were empty");
292 lowEvt = EventInfo{-1, -1, 0};
293 highEvt = EventInfo{-1, -1, 0};
297 outputMetaData->setLfn(
"");
298 outputMetaData->setParents(std::vector<std::string>(allParents.begin(), allParents.end()));
299 outputMetaData->setLow(std::get<0>(*lowEvt), std::get<1>(*lowEvt), std::get<2>(*lowEvt));
300 outputMetaData->setHigh(std::get<0>(*highEvt), std::get<1>(*highEvt), std::get<2>(*highEvt));
302 if(inputfilenames.size()>1){
303 outputMetaData->setRandomSeed(
"");
309 TFile output(outputfilename.c_str(),
"RECREATE");
310 if (output.IsZombie()) {
311 B2ERROR(
"Could not create output file " << std::quoted(outputfilename));
315 TTree* outputEventTree{
nullptr};
316 for (
const auto& input : inputfilenames) {
317 B2INFO(
"processing events from " << std::quoted(input));
318 TFile tfile(input.c_str());
319 auto* tree =
dynamic_cast<TTree*
>(tfile.Get(
"tree"));
320 if(!outputEventTree){
322 outputEventTree = tree->CloneTree(0);
324 outputEventTree->CopyAddresses(tree);
329 outputEventTree->CopyEntries(tree, -1,
"fast SortBasketsByEntry BuildIndexOnError");
331 outputEventTree->CopyAddresses(tree,
true);
337 if(!outputEventTree->GetTreeIndex()) {
338 B2INFO(
"No Index found: building new index");
343 outputEventTree->Write();
344 B2INFO(
"Done processing events");
347 outputMetaData->setLfn(fs::absolute(outputfilename, fs::initial_path()).
string());
349 if(variables.count(
"add-to-catalog")>0) {
352 B2INFO(
"Writing FileMetaData");
355 TTree outputMetaDataTree(
"persistent",
"persistent");
356 outputMetaDataTree.Branch(
"FileMetaData", &outputMetaData);
357 for(
auto &it: persistentMergeables){
358 outputMetaDataTree.Branch(it.first.c_str(), &it.second.first);
360 outputMetaDataTree.Fill();
361 outputMetaDataTree.Write();
364 for(
const auto& val: persistentMergeables){
365 delete val.second.first;
367 persistentMergeables.clear();
368 delete outputMetaData;
static FileCatalog & Instance()
Static method to get a reference to the FileCatalog instance.
virtual bool registerFile(const std::string &fileName, FileMetaData &metaData, const std::string &oldLFN="")
Register a file in the (local) file catalog.
create human-readable or JSON output for key value pairs.
@ c_Error
Error: for things that went wrong and have to be fixed.
@ c_Info
Info: for informational messages, e.g.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
@ c_Warning
Warning: for potential problems that the user should pay attention to.
@ c_Level
Log level of the message.
@ c_Message
Log message text.
void setLogInfo(ELogLevel logLevel, unsigned int logInfo)
Configure the printed log information for the given level.
LogConfig * getLogConfig()
Returns global log system configuration.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Abstract base class for objects that can be merged.
Helper class to factorize some necessary tasks when working with Belle2 output files.
void setCreationData(FileMetaData &metadata)
Fill the creation info of a file meta data: site, user, data.
void buildIndex(TTree *tree)
Build TTreeIndex on tree (assumes EventMetaData branch exists there).
Abstract base class for different kinds of events.
int main(int argc, char **argv)
Run all tests.