Belle II Software light-2411-aldebaran
merge_files.py
1#!/usr/bin/env python3
2
3
10
11import os
12import sys
13import re
14import subprocess
15import itertools
16from shutil import copyfile
17import ROOT
18from ROOT.Belle2 import FileMetaData
19# we don't really need basf2 but it fixes the print buffering problem
20import basf2
21from b2test_utils import clean_working_directory, skip_test_if_light
22
23
24def create_testfile(name, exp=0, run=0, events=100, branchNames=None, **argk):
25 """Create a test file from a steering string"""
26 if branchNames is None:
27 branchNames = []
28 global testfile_steering
29 env = dict(os.environ)
30 env.update(argk)
31
32 steering_file = f"steering-{name}.py"
33 with open(steering_file, "w") as f:
34 f.write(testfile_steering)
35
36 subprocess.call(
37 [
38 "basf2",
39 "-o",
40 name,
41 "--experiment",
42 str(exp),
43 "--run",
44 str(run),
45 "-n",
46 str(events),
47 steering_file,
48 ]
49 + branchNames,
50 env=env,
51 )
52
53
54def create_testfile_direct(
55 name,
56 metadata=None,
57 release="test_release",
58 user="test_user",
59 seed=None,
60 site="test_site",
61 global_tag="test_globaltag",
62 steering="test_steering",
63):
64 """similar to create_testfile but does it manually without running basf2 for
65 full control over the FileMetaData"""
66 if metadata is None:
67 metadata = FileMetaData()
68 if seed is None:
69 seed = name + "-seed"
70
71 if seed is not None:
72 metadata.setRandomSeed(seed)
73 metadata.setLfn(name)
74 metadata.setCreationData(
75 "the most auspicious of days for testing", site, user, release
76 )
77 metadata.setDatabaseGlobalTag(global_tag)
78 metadata.setSteering(steering)
79 f = ROOT.TFile(name, "RECREATE")
80 t = ROOT.TTree("persistent", "persistent")
81 t.Branch("FileMetaData", metadata)
82 t.Fill()
83 t.Write()
84 t = ROOT.TTree("tree", "tree")
85 event_meta = ROOT.Belle2.EventMetaData()
86 t.Branch("EventMetaData", event_meta)
87 t.Fill()
88 t.Write()
89 f.Close()
90
91
92def create_testfile_ntuple(input, output, treeNames=["tree", "anotherTree"], **argk):
93 """Create a test ntuple file from a steering string"""
94 global testfile_ntuple_steering
95 env = dict(os.environ)
96 env.update(argk)
97
98 steering_file = "steering-ntuple.py"
99 with open(steering_file, "w") as f:
100 f.write(testfile_ntuple_steering)
101
102 subprocess.call(
103 ["basf2", "-i", input, "-o", output, steering_file] + treeNames, env=env
104 )
105
106 # update release in metadata to avoid 'modified-xxx' warnings
107 metadata = get_metadata(output)
108 metadata.setCreationData(
109 metadata.getDate(), metadata.getSite(), metadata.getUser(), "test-release"
110 )
111 f = ROOT.TFile(output, "UPDATE")
112 t = ROOT.TTree("persistent", "persistent")
113 t.Branch("FileMetaData", metadata)
114 t.Fill()
115 t.Write()
116 f.Close()
117
118
119def get_metadata(name="output.root"):
120 """Get the metadata out of a root file"""
121 out = ROOT.TFile(name)
122 t = out.Get("persistent")
123 t.GetEntry(0)
124 return FileMetaData(t.FileMetaData)
125
126
127def merge_files(*args, output="output.root", filter_modified=False):
128 """run the merging tool on all passed files
129
130 Parameters:
131 output: name of the output file
132 filter_modified: if True omit warnings that the release is modified and
133 consistency cannot be checked
134 """
135 process = subprocess.run(
136 ["b2file-merge", "-q", output] + list(args), stdout=subprocess.PIPE
137 )
138 # do we want to filter the modified release warning?
139 if filter_modified:
140 # if so replace them using regular expression
141 process.stdout = re.sub(
142 rb"^\[WARNING\] File \"(.*?)\" created with modified software ([a-zA-Z0-9\-+]*?): "
143 rb"cannot verify that files are compatible\n",
144 b"",
145 process.stdout,
146 flags=re.MULTILINE,
147 )
148
149 # in any case print output
150 sys.stdout.buffer.write(process.stdout)
151 sys.stdout.buffer.flush()
152 # and return exitcode
153 return process.returncode
154
155
156
157testfile_steering = """
158import os
159import sys
160import basf2
161basf2.set_log_level(basf2.LogLevel.ERROR)
162if "BELLE2_GLOBALTAG" in os.environ:
163 basf2.conditions.override_globaltags([os.environ["BELLE2_GLOBALTAG"]])
164if "BELLE2_SEED" in os.environ:
165 basf2.set_random_seed(os.environ["BELLE2_SEED"])
166main = basf2.create_path()
167main.add_module("EventInfoSetter")
168main.add_module("ParticleGun")
169main.add_module("RootOutput", branchNames=sys.argv[1:])
170basf2.process(main)
171"""
172
173
174## Minimal steering file to create output ntuples we can merge
175testfile_ntuple_steering = """
176import sys
177import basf2
178basf2.set_log_level(basf2.LogLevel.ERROR)
179main = basf2.create_path()
180main.add_module('RootInput')
181main.add_module('VariablesToNtuple',
182 treeName=sys.argv[1]
183 )
184main.add_module('VariablesToNtuple',
185 treeName=sys.argv[2]
186 )
187basf2.process(main)
188"""
189
190
191def check_01_existing():
192 """Check that merging a non exsiting file fails"""
193 create_testfile_direct("test2.root")
194 return merge_files("/test1.root") != 0 and merge_files("test2.root") == 0
195
196
197def check_02_nonroot():
198 """Check that merging fails on non-root input files"""
199 with open("test1.root", "w") as f:
200 f.write("This is not a ROOT file")
201 return merge_files("test1.root") != 0
202
203
204def check_03_overwrite():
205 """Check that overwriting fails if -f is missing"""
206 create_testfile_direct("test1.root")
207 with open("output.root", "w") as f:
208 f.write("stuff")
209 return merge_files("test1.root") != 0 and merge_files("-f", "test1.root") == 0
210
211
212def check_04_access():
213 """Check that it fails if we cannot create output file"""
214 create_testfile_direct("test1.root")
215 return merge_files("test1.root", output="/nosuchdir/foo") != 0
216
217
218def check_05_release():
219 """Check that it fails if the releases are different"""
220 create_testfile_direct("test1.root")
221 create_testfile_direct("test2.root", release="other_release")
222 return merge_files("test1.root", "test2.root") != 0
223
224
225def check_06_empty_release():
226 """Check that merging fails with empty release valuses"""
227 create_testfile_direct("test1.root")
228 create_testfile_direct("test2.root", release="")
229 return merge_files("test1.root", "test2.root") != 0
230
231
232def check_07_modified_release():
233 """Check that merging modified release gives warning about that but merging should work"""
234 create_testfile_direct("test1.root", release="test_release")
235 create_testfile_direct("test2.root", release="test_release-modified")
236 return merge_files("test1.root", "test2.root") == 0
237
238
239def check_08_duplicate_seed():
240 """Check that we get a warning for identical seeds but merging should work"""
241 create_testfile_direct("test1.root", seed="seed1")
242 create_testfile_direct("test2.root", seed="seed1")
243 return merge_files("test1.root", "test2.root") == 0
244
245
246def check_09_different_steering():
247 """Check that merging fails if the steering file is different"""
248 create_testfile_direct("test1.root")
249 create_testfile_direct("test2.root", steering="my other steering")
250 return merge_files("test1.root", "test2.root") != 0
251
252
253def check_10_different_globaltag():
254 """Check that merging fails if the global tag is different"""
255 create_testfile_direct("test1.root")
256 create_testfile_direct("test2.root", global_tag="other_globaltag")
257 return merge_files("test1.root", "test2.root") != 0
258
259
260def check_11_branches():
261 """Check that merging fails if the branches in the event tree are different"""
262 create_testfile("test1.root")
263 create_testfile("test2.root", branchNames=["EventMetaData"])
264 return merge_files("test1.root", "test2.root", filter_modified=True) != 0
265
266
267def check_12_hadded():
268 """Check that merging fails if the file has more then one entry in the persistent tree"""
269 create_testfile_direct("test1.root")
270 subprocess.call(["hadd", "test11.root", "test1.root", "test1.root"])
271 return merge_files("test11.root") != 0
272
273
274def check_13_nopersistent():
275 """Check that merging fails without persistent tree"""
276 f = ROOT.TFile("test1.root", "RECREATE")
277 t = ROOT.TTree("tree", "tree")
278 t.Write()
279 f.Close()
280 return merge_files("test1.root") != 0
281
282
283def check_14_noeventtree():
284 """Check that merging fails without event tree"""
285 f = ROOT.TFile("test1.root", "RECREATE")
286 t = ROOT.TTree("persistent", "persistent")
287 meta = FileMetaData()
288 t.Branch("FileMetaData", meta)
289 t.Fill()
290 t.Write()
291 f.Close()
292 return merge_files("test1.root") != 0
293
294
295def check_15_noeventbranches():
296 """Check that merging fails without event tree"""
297 f = ROOT.TFile("test1.root", "RECREATE")
298 t = ROOT.TTree("persistent", "persistent")
299 meta = FileMetaData()
300 meta.setCreationData("date", "site", "user", "release")
301 t.Branch("FileMetaData", meta)
302 t.Fill()
303 t.Write()
304 t = ROOT.TTree("tree", "tree")
305 t.Write()
306 f.Close()
307 return merge_files("test1.root") != 0
308
309
310def check_16_nonmergeable():
311 """Check that merging fails if there are mutiple mergeable persistent trees"""
312 f = ROOT.TFile("test1.root", "RECREATE")
313 t = ROOT.TTree("persistent", "persistent")
314 meta = FileMetaData()
315 meta.setCreationData("date", "site", "user", "release")
316 t.Branch("FileMetaData", meta)
317 t.Branch("AnotherMetaData", meta)
318 t.Fill()
319 t.Write()
320 t = ROOT.TTree("tree", "tree")
321 t.Branch("EventMetaData", meta)
322 t.Fill()
323 t.Write()
324 f.Close()
325 return merge_files("test1.root") != 0
326
327
328def check_17_checkparentLFN():
329 """Check that parent LFN get merged correctly"""
330 parents = [("a", "b", "c"), ("a", "c", "d")]
331 m1 = FileMetaData()
332 m2 = FileMetaData()
333 lfn1 = ROOT.std.vector("std::string")()
334 lfn2 = ROOT.std.vector("std::string")()
335 for e in parents[0]:
336 lfn1.push_back(e)
337 for e in parents[1]:
338 lfn2.push_back(e)
339 m1.setParents(lfn1)
340 m2.setParents(lfn2)
341 m1.setRandomSeed("1")
342 m2.setRandomSeed("2")
343 create_testfile_direct("test1.root", m1)
344 create_testfile_direct("test2.root", m2)
345 merge_files("test1.root", "test2.root")
346 meta = get_metadata()
347 should_be = [e for e in sorted(set(parents[0] + parents[1]))]
348 is_actual = [meta.getParent(i) for i in range(meta.getNParents())]
349 return should_be == is_actual
350
351
352def check_18_checkEventNr():
353 """Check that event and mc numbers are summed correctly"""
354 evtNr = [10, 1243, 232, 1272, 25]
355 evtNrFullEvents = [i - 1 for i in evtNr]
356 mcNr = [120, 821, 23, 923, 1]
357 files = []
358 for i, (e, f, m) in enumerate(zip(evtNr, evtNrFullEvents, mcNr)):
359 meta = FileMetaData()
360 meta.setNEvents(e)
361 meta.setNFullEvents(f)
362 meta.setMcEvents(m)
363 meta.setRandomSeed(str(i))
364 files.append(f"test{i}.root")
365 create_testfile_direct(files[-1], meta)
366 merge_files(*files)
367 meta = get_metadata()
368 return (
369 sum(evtNr) == meta.getNEvents()
370 and sum(evtNrFullEvents) == meta.getNFullEvents()
371 and sum(mcNr) == meta.getMcEvents()
372 )
373
374
375def check_19_lowhigh():
376 """Check that the low/high event numbers are merged correctly"""
377 lowhigh = [
378 (-1, -1, 0),
379 (0, 0, 0),
380 (0, 0, 1),
381 (0, 1, 0),
382 (1, 0, 0),
383 (1, 1, 1),
384 ]
385 files = []
386 for i, e in enumerate(lowhigh):
387 meta = FileMetaData()
388 meta.setNEvents(0 if e == (-1, -1, 0) else 1)
389 meta.setNFullEvents(0 if e == (-1, -1, 0) else 1)
390 meta.setRandomSeed(str(i))
391 meta.setLow(e[0], e[1], e[2])
392 meta.setHigh(e[0], e[1], e[2])
393 files.append(f"test{i}.root")
394 create_testfile_direct(files[-1], meta)
395
396 # test all possible combinations taking 2 elements from the list plus the
397 # full list in one go
398 indices = range(len(files))
399 tests = list(itertools.permutations(indices, 2)) + [indices]
400 for indices in tests:
401 low = min(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
402 high = max(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
403 if merge_files("-f", "--no-catalog", *(files[i] for i in indices)) != 0:
404 return False
405 meta = get_metadata()
406 if (
407 meta.getExperimentLow() != low[0]
408 or meta.getRunLow() != low[1]
409 or meta.getEventLow() != low[2]
410 ):
411 print("low event should be", low)
412 meta.Print()
413 return False
414 if (
415 meta.getExperimentHigh() != high[0]
416 or meta.getRunHigh() != high[1]
417 or meta.getEventHigh() != high[2]
418 ):
419 print("high event should be", high)
420 meta.Print()
421 return False
422 return True
423
424
425def check_20_test_file():
426 """Check that a merged file passes the b2file-check program"""
427 create_testfile("test1.root", events=1111)
428 create_testfile("test2.root", events=123)
429 merge_files("test1.root", "test2.root", filter_modified=True)
430 return (
431 subprocess.call(
432 [
433 "b2file-check",
434 "-n",
435 "1234",
436 "--mcevents",
437 "1234",
438 "output.root",
439 "EventMetaData",
440 "MCParticles",
441 ]
442 )
443 == 0
444 )
445
446
447def check_21_eventmetadata():
448 """Check that merged files has all the correct even infos"""
449 create_testfile(
450 "test1.root", run=0, events=100, BELLE2_SEED="test1", BELLE2_USER="user1"
451 )
452 create_testfile(
453 "test2.root", run=1, events=100, BELLE2_SEED="test2", BELLE2_USER="user2"
454 )
455 merge_files("test1.root", "test2.root", "test1.root", filter_modified=True)
456 out = ROOT.TFile("output.root")
457 events = out.Get("tree")
458 entries = events.GetEntriesFast()
459 if entries != 300:
460 return False
461 # we expect to see the events from run 0 twice and the ones from run 1 once.
462 # So create a dictionary which contains the expected counts
463 eventcount = {(0, 0, i + 1): 2 for i in range(100)}
464 eventcount.update({(0, 1, i + 1): 1 for i in range(100)})
465 for i in range(entries):
466 events.GetEntry(i)
467 e = events.EventMetaData
468 eventcount[(e.getExperiment(), e.getRun(), e.getEvent())] -= 1
469 return max(eventcount.values()) == 0 and min(eventcount.values()) == 0
470
471
472def check_22_real_mc():
473 """Check that merging fails if real and MC data are mixed"""
474 create_testfile_direct("test1.root")
475 copyfile(basf2.find_file("framework/tests/fake_real.root"), "test2.root")
476 return merge_files("test1.root", "test2.root") != 0
477
478
479def check_23_legacy_ip():
480 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
481 create_testfile_direct("test1.root", global_tag="test_globaltag")
482 create_testfile_direct(
483 "test2.root", global_tag="test_globaltag,Legacy_IP_Information"
484 )
485 if merge_files("test1.root", "test2.root") != 0:
486 return False
487 meta = get_metadata()
488 return meta.getDatabaseGlobalTag() == "test_globaltag"
489
490
491def check_24_legacy_ip_middle():
492 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
493 create_testfile_direct("test1.root", global_tag="test_globaltag,other")
494 create_testfile_direct(
495 "test2.root", global_tag="test_globaltag,Legacy_IP_Information,other"
496 )
497 if merge_files("test1.root", "test2.root") != 0:
498 return False
499 meta = get_metadata()
500 return meta.getDatabaseGlobalTag() == "test_globaltag,other"
501
502
503def check_25_legacy_ip_only():
504 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
505 create_testfile_direct("test1.root", global_tag="")
506 create_testfile_direct("test2.root", global_tag="Legacy_IP_Information")
507 if merge_files("test1.root", "test2.root") != 0:
508 return False
509 meta = get_metadata()
510 return meta.getDatabaseGlobalTag() == ""
511
512
513def check_26_ntuple_merge():
514 """Check that we can merge two ntuple output files"""
515 create_testfile("test1.root", exp=1, run=2, events=111)
516 create_testfile("test2.root", exp=1, run=2, events=123)
517 create_testfile_ntuple(input="test1.root", output="ntuple1.root")
518 create_testfile_ntuple(input="test2.root", output="ntuple2.root")
519 return merge_files("ntuple1.root", "ntuple2.root") == 0
520
521
522def check_27_ntuple_trees():
523 """Check that ntuple merge failes if the tree names are different"""
524 create_testfile("test1.root")
525 create_testfile("test2.root")
526 create_testfile_ntuple(input="test1.root", output="ntuple1.root")
527 create_testfile_ntuple(
528 input="test2.root", output="ntuple2.root", treeNames=["differentTree", "tree"]
529 )
530 return merge_files("ntuple1.root", "ntuple2.root") != 0
531
532
533def check_XX_filemetaversion():
534 """Check that the Version of the FileMetaData hasn't changed.
535 If this check fails please check that the changes to FileMetaData don't affect b2file-merge and adapt the correct version number here."""
536 return FileMetaData.Class().GetClassVersion() == 11
537
538
539if __name__ == "__main__":
540 skip_test_if_light() # light builds don't have particle gun
541 failures = 0
542 existing = [e for e in sorted(globals().items()) if e[0].startswith("check_")]
543 for name, fcn in existing:
544 print(f"running {name}: {fcn.__doc__}")
545 with clean_working_directory():
546 if not fcn():
547 print(f"{name} failed")
548 failures += 1
549 else:
550 print(f"{name} passed")
551
552 sys.exit(failures)
553