Belle II Software development
merge_files.py
1#!/usr/bin/env python3
2
3
10
11import os
12import sys
13import re
14import subprocess
15import itertools
16from shutil import copyfile
17import ROOT
18from ROOT.Belle2 import FileMetaData
19# we don't really need basf2 but it fixes the print buffering problem
20import basf2
21from b2test_utils import clean_working_directory, skip_test_if_light
22
23
24def create_testfile(name, exp=0, run=0, events=100, branchNames=None, **argk):
25 """Create a test file from a steering string"""
26 if branchNames is None:
27 branchNames = []
28 global testfile_steering
29 env = dict(os.environ)
30 env.update(argk)
31
32 steering_file = f"steering-{name}.py"
33 with open(steering_file, "w") as f:
34 f.write(testfile_steering)
35
36 subprocess.call(
37 [
38 "basf2",
39 "-o",
40 name,
41 "--experiment",
42 str(exp),
43 "--run",
44 str(run),
45 "-n",
46 str(events),
47 steering_file,
48 ]
49 + branchNames,
50 env=env,
51 )
52
53
54def create_testfile_direct(
55 name,
56 metadata=None,
57 release="test_release",
58 user="test_user",
59 seed=None,
60 site="test_site",
61 global_tag="test_globaltag",
62 steering="test_steering",
63):
64 """similar to create_testfile but does it manually without running basf2 for
65 full control over the FileMetaData"""
66 if metadata is None:
67 metadata = FileMetaData()
68 if seed is None:
69 seed = name + "-seed"
70
71 if seed is not None:
72 metadata.setRandomSeed(seed)
73 metadata.setLfn(name)
74 metadata.setCreationData(
75 "the most auspicious of days for testing", site, user, release
76 )
77 metadata.setDatabaseGlobalTag(global_tag)
78 metadata.setSteering(steering)
79 f = ROOT.TFile(name, "RECREATE")
80 t = ROOT.TTree("persistent", "persistent")
81 t.Branch("FileMetaData", metadata)
82 t.Fill()
83 t.Write()
84 t = ROOT.TTree("tree", "tree")
85 event_meta = ROOT.Belle2.EventMetaData()
86 t.Branch("EventMetaData", event_meta)
87 t.Fill()
88 t.Write()
89 f.Close()
90
91
92def create_testfile_ntuple(input, output, treeNames=["tree", "anotherTree"], **argk):
93 """Create a test ntuple file from a steering string"""
94 global testfile_ntuple_steering
95 env = dict(os.environ)
96 env.update(argk)
97
98 steering_file = "steering-ntuple.py"
99 with open(steering_file, "w") as f:
100 f.write(testfile_ntuple_steering)
101
102 subprocess.call(
103 ["basf2", "-i", input, "-o", output, steering_file] + treeNames, env=env
104 )
105
106 # update release in metadata to avoid 'modified-xxx' warnings
107 metadata = basf2.get_file_metadata(output)
108 metadata.setCreationData(
109 metadata.getDate(), metadata.getSite(), metadata.getUser(), "test-release"
110 )
111 f = ROOT.TFile(output, "UPDATE")
112 t = ROOT.TTree("persistent", "persistent")
113 t.Branch("FileMetaData", metadata)
114 t.Fill()
115 t.Write()
116 f.Close()
117
118
119def merge_files(*args, output="output.root", filter_modified=False):
120 """run the merging tool on all passed files
121
122 Parameters:
123 output: name of the output file
124 filter_modified: if True omit warnings that the release is modified and
125 consistency cannot be checked
126 """
127 process = subprocess.run(
128 ["b2file-merge", "-q", output] + list(args), stdout=subprocess.PIPE
129 )
130 # do we want to filter the modified release warning?
131 if filter_modified:
132 # if so replace them using regular expression
133 process.stdout = re.sub(
134 rb"^\[WARNING\] File \"(.*?)\" created with modified software ([a-zA-Z0-9\-+]*?): "
135 rb"cannot verify that files are compatible\n",
136 b"",
137 process.stdout,
138 flags=re.MULTILINE,
139 )
140
141 # in any case print output
142 sys.stdout.buffer.write(process.stdout)
143 sys.stdout.buffer.flush()
144 # and return exitcode
145 return process.returncode
146
147
148
149testfile_steering = """
150import os
151import sys
152import basf2
153basf2.set_log_level(basf2.LogLevel.ERROR)
154if "BELLE2_GLOBALTAG" in os.environ:
155 basf2.conditions.override_globaltags([os.environ["BELLE2_GLOBALTAG"]])
156if "BELLE2_SEED" in os.environ:
157 basf2.set_random_seed(os.environ["BELLE2_SEED"])
158main = basf2.create_path()
159main.add_module("EventInfoSetter")
160main.add_module("ParticleGun")
161main.add_module("RootOutput", branchNames=sys.argv[1:])
162basf2.process(main)
163"""
164
165
166## Minimal steering file to create output ntuples we can merge
167testfile_ntuple_steering = """
168import sys
169import basf2
170basf2.set_log_level(basf2.LogLevel.ERROR)
171main = basf2.create_path()
172main.add_module('RootInput')
173main.add_module('VariablesToNtuple',
174 treeName=sys.argv[1]
175 )
176main.add_module('VariablesToNtuple',
177 treeName=sys.argv[2]
178 )
179basf2.process(main)
180"""
181
182
183def check_01_existing():
184 """Check that merging a non existing file fails"""
185 create_testfile_direct("test2.root")
186 return merge_files("/test1.root") != 0 and merge_files("test2.root") == 0
187
188
189def check_02_nonroot():
190 """Check that merging fails on non-root input files"""
191 with open("test1.root", "w") as f:
192 f.write("This is not a ROOT file")
193 return merge_files("test1.root") != 0
194
195
196def check_03_overwrite():
197 """Check that overwriting fails if -f is missing"""
198 create_testfile_direct("test1.root")
199 with open("output.root", "w") as f:
200 f.write("stuff")
201 return merge_files("test1.root") != 0 and merge_files("-f", "test1.root") == 0
202
203
204def check_04_access():
205 """Check that it fails if we cannot create output file"""
206 create_testfile_direct("test1.root")
207 return merge_files("test1.root", output="/nosuchdir/foo") != 0
208
209
210def check_05_release():
211 """Check that it fails if the releases are different"""
212 create_testfile_direct("test1.root")
213 create_testfile_direct("test2.root", release="other_release")
214 return merge_files("test1.root", "test2.root") != 0
215
216
217def check_06_empty_release():
218 """Check that merging fails with empty release values"""
219 create_testfile_direct("test1.root")
220 create_testfile_direct("test2.root", release="")
221 return merge_files("test1.root", "test2.root") != 0
222
223
224def check_07_modified_release():
225 """Check that merging modified release gives warning about that but merging should work"""
226 create_testfile_direct("test1.root", release="test_release")
227 create_testfile_direct("test2.root", release="test_release-modified")
228 return merge_files("test1.root", "test2.root") == 0
229
230
231def check_08_duplicate_seed():
232 """Check that we get a warning for identical seeds but merging should work"""
233 create_testfile_direct("test1.root", seed="seed1")
234 create_testfile_direct("test2.root", seed="seed1")
235 return merge_files("test1.root", "test2.root") == 0
236
237
238def check_09_different_steering():
239 """Check that merging fails if the steering file is different"""
240 create_testfile_direct("test1.root")
241 create_testfile_direct("test2.root", steering="my other steering")
242 return merge_files("test1.root", "test2.root") != 0
243
244
245def check_10_different_globaltag():
246 """Check that merging fails if the global tag is different"""
247 create_testfile_direct("test1.root")
248 create_testfile_direct("test2.root", global_tag="other_globaltag")
249 return merge_files("test1.root", "test2.root") != 0
250
251
252def check_11_branches():
253 """Check that merging fails if the branches in the event tree are different"""
254 create_testfile("test1.root")
255 create_testfile("test2.root", branchNames=["EventMetaData"])
256 return merge_files("test1.root", "test2.root", filter_modified=True) != 0
257
258
259def check_12_hadded():
260 """Check that merging fails if the file has more then one entry in the persistent tree"""
261 create_testfile_direct("test1.root")
262 subprocess.call(["hadd", "test11.root", "test1.root", "test1.root"])
263 return merge_files("test11.root") != 0
264
265
266def check_13_nopersistent():
267 """Check that merging fails without persistent tree"""
268 f = ROOT.TFile("test1.root", "RECREATE")
269 t = ROOT.TTree("tree", "tree")
270 t.Write()
271 f.Close()
272 return merge_files("test1.root") != 0
273
274
275def check_14_noeventtree():
276 """Check that merging fails without event tree"""
277 f = ROOT.TFile("test1.root", "RECREATE")
278 t = ROOT.TTree("persistent", "persistent")
279 meta = FileMetaData()
280 t.Branch("FileMetaData", meta)
281 t.Fill()
282 t.Write()
283 f.Close()
284 return merge_files("test1.root") != 0
285
286
287def check_15_noeventbranches():
288 """Check that merging fails without event tree"""
289 f = ROOT.TFile("test1.root", "RECREATE")
290 t = ROOT.TTree("persistent", "persistent")
291 meta = FileMetaData()
292 meta.setCreationData("date", "site", "user", "release")
293 t.Branch("FileMetaData", meta)
294 t.Fill()
295 t.Write()
296 t = ROOT.TTree("tree", "tree")
297 t.Write()
298 f.Close()
299 return merge_files("test1.root") != 0
300
301
302def check_16_nonmergeable():
303 """Check that merging fails if there are multiple mergeable persistent trees"""
304 f = ROOT.TFile("test1.root", "RECREATE")
305 t = ROOT.TTree("persistent", "persistent")
306 meta = FileMetaData()
307 meta.setCreationData("date", "site", "user", "release")
308 t.Branch("FileMetaData", meta)
309 t.Branch("AnotherMetaData", meta)
310 t.Fill()
311 t.Write()
312 t = ROOT.TTree("tree", "tree")
313 t.Branch("EventMetaData", meta)
314 t.Fill()
315 t.Write()
316 f.Close()
317 return merge_files("test1.root") != 0
318
319
320def check_17_checkparentLFN():
321 """Check that parent LFN get merged correctly"""
322 parents = [("a", "b", "c"), ("a", "c", "d")]
323 m1 = FileMetaData()
324 m2 = FileMetaData()
325 lfn1 = ROOT.std.vector("std::string")()
326 lfn2 = ROOT.std.vector("std::string")()
327 for e in parents[0]:
328 lfn1.push_back(e)
329 for e in parents[1]:
330 lfn2.push_back(e)
331 m1.setParents(lfn1)
332 m2.setParents(lfn2)
333 m1.setRandomSeed("1")
334 m2.setRandomSeed("2")
335 create_testfile_direct("test1.root", m1)
336 create_testfile_direct("test2.root", m2)
337 merge_files("test1.root", "test2.root")
338 meta = basf2.get_file_metadata("output.root")
339 should_be = [e for e in sorted(set(parents[0] + parents[1]))]
340 is_actual = [meta.getParent(i) for i in range(meta.getNParents())]
341 return should_be == is_actual
342
343
344def check_18_checkEventNr():
345 """Check that event and mc numbers are summed correctly"""
346 evtNr = [10, 1243, 232, 1272, 25]
347 evtNrFullEvents = [i - 1 for i in evtNr]
348 mcNr = [120, 821, 23, 923, 1]
349 files = []
350 for i, (e, f, m) in enumerate(zip(evtNr, evtNrFullEvents, mcNr)):
351 meta = FileMetaData()
352 meta.setNEvents(e)
353 meta.setNFullEvents(f)
354 meta.setMcEvents(m)
355 meta.setRandomSeed(str(i))
356 files.append(f"test{i}.root")
357 create_testfile_direct(files[-1], meta)
358 merge_files(*files)
359 meta = basf2.get_file_metadata("output.root")
360 return (
361 sum(evtNr) == meta.getNEvents()
362 and sum(evtNrFullEvents) == meta.getNFullEvents()
363 and sum(mcNr) == meta.getMcEvents()
364 )
365
366
367def check_19_lowhigh():
368 """Check that the low/high event numbers are merged correctly"""
369 lowhigh = [
370 (-1, -1, 0),
371 (0, 0, 0),
372 (0, 0, 1),
373 (0, 1, 0),
374 (1, 0, 0),
375 (1, 1, 1),
376 ]
377 files = []
378 for i, e in enumerate(lowhigh):
379 meta = FileMetaData()
380 meta.setNEvents(0 if e == (-1, -1, 0) else 1)
381 meta.setNFullEvents(0 if e == (-1, -1, 0) else 1)
382 meta.setRandomSeed(str(i))
383 meta.setLow(e[0], e[1], e[2])
384 meta.setHigh(e[0], e[1], e[2])
385 files.append(f"test{i}.root")
386 create_testfile_direct(files[-1], meta)
387
388 # test all possible combinations taking 2 elements from the list plus the
389 # full list in one go
390 indices = range(len(files))
391 tests = list(itertools.permutations(indices, 2)) + [indices]
392 for indices in tests:
393 low = min(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
394 high = max(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
395 if merge_files("-f", "--no-catalog", *(files[i] for i in indices)) != 0:
396 return False
397 meta = basf2.get_file_metadata("output.root")
398 if (
399 meta.getExperimentLow() != low[0]
400 or meta.getRunLow() != low[1]
401 or meta.getEventLow() != low[2]
402 ):
403 print("low event should be", low)
404 meta.Print()
405 return False
406 if (
407 meta.getExperimentHigh() != high[0]
408 or meta.getRunHigh() != high[1]
409 or meta.getEventHigh() != high[2]
410 ):
411 print("high event should be", high)
412 meta.Print()
413 return False
414 return True
415
416
417def check_20_test_file():
418 """Check that a merged file passes the b2file-check program"""
419 create_testfile("test1.root", events=1111)
420 create_testfile("test2.root", events=123)
421 merge_files("test1.root", "test2.root", filter_modified=True)
422 return (
423 subprocess.call(
424 [
425 "b2file-check",
426 "-n",
427 "1234",
428 "--mcevents",
429 "1234",
430 "output.root",
431 "EventMetaData",
432 "MCParticles",
433 ]
434 )
435 == 0
436 )
437
438
439def check_21_eventmetadata():
440 """Check that merged files has all the correct even infos"""
441 create_testfile(
442 "test1.root", run=0, events=100, BELLE2_SEED="test1", BELLE2_USER="user1"
443 )
444 create_testfile(
445 "test2.root", run=1, events=100, BELLE2_SEED="test2", BELLE2_USER="user2"
446 )
447 merge_files("test1.root", "test2.root", "test1.root", filter_modified=True)
448 out = ROOT.TFile("output.root")
449 events = out.Get("tree")
450 entries = events.GetEntriesFast()
451 if entries != 300:
452 return False
453 # we expect to see the events from run 0 twice and the ones from run 1 once.
454 # So create a dictionary which contains the expected counts
455 eventcount = {(0, 0, i + 1): 2 for i in range(100)}
456 eventcount.update({(0, 1, i + 1): 1 for i in range(100)})
457 for i in range(entries):
458 events.GetEntry(i)
459 e = events.EventMetaData
460 eventcount[(e.getExperiment(), e.getRun(), e.getEvent())] -= 1
461 return max(eventcount.values()) == 0 and min(eventcount.values()) == 0
462
463
464def check_22_real_mc():
465 """Check that merging fails if real and MC data are mixed"""
466 create_testfile_direct("test1.root")
467 copyfile(basf2.find_file("framework/tests/fake_real.root"), "test2.root")
468 return merge_files("test1.root", "test2.root") != 0
469
470
471def check_23_legacy_ip():
472 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
473 create_testfile_direct("test1.root", global_tag="test_globaltag")
474 create_testfile_direct(
475 "test2.root", global_tag="test_globaltag,Legacy_IP_Information"
476 )
477 if merge_files("test1.root", "test2.root") != 0:
478 return False
479 meta = basf2.get_file_metadata("output.root")
480 return meta.getDatabaseGlobalTag() == "test_globaltag"
481
482
483def check_24_legacy_ip_middle():
484 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
485 create_testfile_direct("test1.root", global_tag="test_globaltag,other")
486 create_testfile_direct(
487 "test2.root", global_tag="test_globaltag,Legacy_IP_Information,other"
488 )
489 if merge_files("test1.root", "test2.root") != 0:
490 return False
491 meta = basf2.get_file_metadata("output.root")
492 return meta.getDatabaseGlobalTag() == "test_globaltag,other"
493
494
495def check_25_legacy_ip_only():
496 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
497 create_testfile_direct("test1.root", global_tag="")
498 create_testfile_direct("test2.root", global_tag="Legacy_IP_Information")
499 if merge_files("test1.root", "test2.root") != 0:
500 return False
501 meta = basf2.get_file_metadata("output.root")
502 return meta.getDatabaseGlobalTag() == ""
503
504
505def check_26_ntuple_merge():
506 """Check that we can merge two ntuple output files"""
507 create_testfile("test1.root", exp=1, run=2, events=111)
508 create_testfile("test2.root", exp=1, run=2, events=123)
509 create_testfile_ntuple(input="test1.root", output="ntuple1.root")
510 create_testfile_ntuple(input="test2.root", output="ntuple2.root")
511 return merge_files("ntuple1.root", "ntuple2.root") == 0
512
513
514def check_27_ntuple_trees():
515 """Check that ntuple merge fails if the tree names are different"""
516 create_testfile("test1.root")
517 create_testfile("test2.root")
518 create_testfile_ntuple(input="test1.root", output="ntuple1.root")
519 create_testfile_ntuple(
520 input="test2.root", output="ntuple2.root", treeNames=["differentTree", "tree"]
521 )
522 return merge_files("ntuple1.root", "ntuple2.root") != 0
523
524
525def check_28_streaming():
526 """Check if we can merge streamed input files"""
527 # Here we use as input a mdst file from GitHub
528 input_file = 'https://github.com/belle2/basf2/raw/refs/heads/main/mdst/tests/mdst-v09-00-00.root'
529 return merge_files(input_file) == 0
530
531
532def check_29_parent_release():
533 """Check that merging files does not modify the release version in the metadata."""
534 create_testfile_direct("test1.root", release="abcd")
535 create_testfile_direct("test2.root", release="abcd")
536 merge_files("test1.root", "test2.root", output="output.root")
537 meta = basf2.get_file_metadata("output.root")
538 return meta.getRelease() == "abcd"
539
540
541def check_XX_filemetaversion():
542 """Check that the Version of the FileMetaData hasn't changed.
543 If this check fails please check that the changes to FileMetaData don't affect b2file-merge and adapt the correct version number here."""
544 return FileMetaData.Class().GetClassVersion() == 11
545
546
547if __name__ == "__main__":
548 skip_test_if_light() # light builds don't have particle gun
549 failures = 0
550 existing = [e for e in sorted(globals().items()) if e[0].startswith("check_")]
551 for name, fcn in existing:
552 print(f"running {name}: {fcn.__doc__}")
553 with clean_working_directory():
554 if not fcn():
555 print(f"{name} failed")
556 failures += 1
557 else:
558 print(f"{name} passed")
559
560 sys.exit(failures)
561