Belle II Software development
merge_files.py
1#!/usr/bin/env python3
2
3
10
11import os
12import sys
13import re
14import subprocess
15import itertools
16from shutil import copyfile
17import ROOT
18from ROOT.Belle2 import FileMetaData
19# we don't really need basf2 but it fixes the print buffering problem
20import basf2
21from b2test_utils import clean_working_directory, skip_test_if_light
22
23
24def create_testfile(name, release=None, exp=0, run=0, events=100, branchNames=None, **argk):
25 """Create a test file from a steering string"""
26 if branchNames is None:
27 branchNames = []
28 global testfile_steering
29 env = dict(os.environ)
30 env.update(argk)
31
32 steering_file = f"steering-{name}.py"
33 with open(steering_file, "w") as f:
34 f.write(testfile_steering)
35
36 subprocess.call(["basf2", "-o", name, "--experiment", str(exp), "--run", str(run),
37 "-n", str(events), steering_file] + branchNames, env=env)
38
39
40def create_testfile_direct(name, metadata=None, release="test_release", user="test_user", seed=None,
41 site="test_site", global_tag="test_globaltag", steering="test_steering"):
42 """similar to create_testfile but does it manually without running basf2 for
43 full control over the FileMetaData"""
44 if metadata is None:
45 metadata = FileMetaData()
46 if seed is None:
47 seed = name + "-seed"
48
49 if seed is not None:
50 metadata.setRandomSeed(seed)
51 metadata.setLfn(name)
52 metadata.setCreationData("the most auspicious of days for testing", site, user, release)
53 metadata.setDatabaseGlobalTag(global_tag)
54 metadata.setSteering(steering)
55 f = ROOT.TFile(name, "RECREATE")
56 t = ROOT.TTree("persistent", "persistent")
57 t.Branch("FileMetaData", metadata)
58 t.Fill()
59 t.Write()
60 t = ROOT.TTree("tree", "tree")
61 event_meta = ROOT.Belle2.EventMetaData()
62 t.Branch("EventMetaData", event_meta)
63 t.Fill()
64 t.Write()
65 f.Close()
66
67
68def get_metadata(name="output.root"):
69 """Get the metadata out of a root file"""
70 out = ROOT.TFile(name)
71 t = out.Get("persistent")
72 t.GetEntry(0)
73 return FileMetaData(t.FileMetaData)
74
75
76def merge_files(*args, output="output.root", filter_modified=False):
77 """run the merging tool on all passed files
78
79 Parameters:
80 output: name of the output file
81 filter_modified: if True omit warnings that the release is modified and
82 consistency cannot be checked
83 """
84 process = subprocess.run(["b2file-merge", "-q", output] + list(args), stdout=subprocess.PIPE)
85 # do we want to filter the modified release warning?
86 if filter_modified:
87 # if so replace them using regular expression
88 process.stdout = re.sub(rb"^\[WARNING\] File \"(.*?)\" created with modified software ([a-zA-Z0-9\-+]*?): "
89 rb"cannot verify that files are compatible\n", b"", process.stdout, flags=re.MULTILINE)
90
91 # in any case print output
92 sys.stdout.buffer.write(process.stdout)
93 sys.stdout.buffer.flush()
94 # and return exitcode
95 return process.returncode
96
97
98
99testfile_steering = """
100import os
101import sys
102import basf2
103basf2.set_log_level(basf2.LogLevel.ERROR)
104if "BELLE2_GLOBALTAG" in os.environ:
105 basf2.conditions.override_globaltags([os.environ["BELLE2_GLOBALTAG"]])
106if "BELLE2_SEED" in os.environ:
107 basf2.set_random_seed(os.environ["BELLE2_SEED"])
108main = basf2.create_path()
109main.add_module("EventInfoSetter")
110main.add_module("ParticleGun")
111main.add_module("RootOutput", branchNames=sys.argv[1:])
112basf2.process(main)
113"""
114
115
116def check_01_existing():
117 """Check that merging a non exsiting file fails"""
118 create_testfile_direct("test2.root")
119 return merge_files("/test1.root") != 0 and merge_files("test2.root") == 0
120
121
122def check_02_nonroot():
123 """Check that merging fails on non-root input files"""
124 with open("test1.root", "w") as f:
125 f.write("This is not a ROOT file")
126 return merge_files("test1.root") != 0
127
128
129def check_03_overwrite():
130 """Check that overwriting fails if -f is missing"""
131 create_testfile_direct("test1.root")
132 with open("output.root", "w") as f:
133 f.write("stuff")
134 return merge_files("test1.root") != 0 and merge_files("-f", "test1.root") == 0
135
136
137def check_04_access():
138 """Check that it fails if we cannot create output file"""
139 create_testfile_direct("test1.root")
140 return merge_files("test1.root", output="/nosuchdir/foo") != 0
141
142
143def check_05_release():
144 """Check that it fails if the releases are different"""
145 create_testfile_direct("test1.root")
146 create_testfile_direct("test2.root", release="other_release")
147 return merge_files("test1.root", "test2.root") != 0
148
149
150def check_06_empty_release():
151 """Check that merging fails with empty release valuses"""
152 create_testfile_direct("test1.root")
153 create_testfile_direct("test2.root", release="")
154 return merge_files("test1.root", "test2.root") != 0
155
156
157def check_07_modified_release():
158 """Check that merging modified release gives warning about that but merging should work"""
159 create_testfile_direct("test1.root", release="test_release")
160 create_testfile_direct("test2.root", release="test_release-modified")
161 return merge_files("test1.root", "test2.root") == 0
162
163
164def check_08_duplicate_seed():
165 """Check that we get a warning for identical seeds but merging should work"""
166 create_testfile_direct("test1.root", seed="seed1")
167 create_testfile_direct("test2.root", seed="seed1")
168 return merge_files("test1.root", "test2.root") == 0
169
170
171def check_09_different_steering():
172 """Check that merging fails if the steering file is different"""
173 create_testfile_direct("test1.root",)
174 create_testfile_direct("test2.root", steering="my other steering")
175 return merge_files("test1.root", "test2.root") != 0
176
177
178def check_10_different_globaltag():
179 """Check that merging fails if the global tag is different"""
180 create_testfile_direct("test1.root")
181 create_testfile_direct("test2.root", global_tag="other_globaltag")
182 return merge_files("test1.root", "test2.root") != 0
183
184
185def check_11_branches():
186 """Check that merging fails if the branches in the event tree are different"""
187 create_testfile("test1.root")
188 create_testfile("test2.root", branchNames=["EventMetaData"])
189 return merge_files("test1.root", "test2.root", filter_modified=True) != 0
190
191
192def check_12_hadded():
193 """Check that merging fails if the file has more then one entry in the persistent tree"""
194 create_testfile_direct("test1.root")
195 subprocess.call(["hadd", "test11.root", "test1.root", "test1.root"])
196 return merge_files("test11.root") != 0
197
198
199def check_13_nopersistent():
200 """Check that merging fails without persistent tree"""
201 f = ROOT.TFile("test1.root", "RECREATE")
202 t = ROOT.TTree("tree", "tree")
203 t.Write()
204 f.Close()
205 return merge_files("test1.root") != 0
206
207
208def check_14_noeventtree():
209 """Check that merging fails without event tree"""
210 f = ROOT.TFile("test1.root", "RECREATE")
211 t = ROOT.TTree("persistent", "persistent")
212 meta = FileMetaData()
213 t.Branch("FileMetaData", meta)
214 t.Fill()
215 t.Write()
216 f.Close()
217 return merge_files("test1.root") != 0
218
219
220def check_15_noeventbranches():
221 """Check that merging fails without event tree"""
222 f = ROOT.TFile("test1.root", "RECREATE")
223 t = ROOT.TTree("persistent", "persistent")
224 meta = FileMetaData()
225 meta.setCreationData("date", "site", "user", "release")
226 t.Branch("FileMetaData", meta)
227 t.Fill()
228 t.Write()
229 t = ROOT.TTree("tree", "tree")
230 t.Write()
231 f.Close()
232 return merge_files("test1.root") != 0
233
234
235def check_16_nonmergeable():
236 """Check that merging fails it there a ron mergeable persistent trees"""
237 f = ROOT.TFile("test1.root", "RECREATE")
238 t = ROOT.TTree("persistent", "persistent")
239 meta = FileMetaData()
240 meta.setCreationData("date", "site", "user", "release")
241 t.Branch("FileMetaData", meta)
242 t.Branch("AnotherMetaData", meta)
243 t.Fill()
244 t.Write()
245 t = ROOT.TTree("tree", "tree")
246 t.Branch("EventMetaData", meta)
247 t.Fill()
248 t.Write()
249 f.Close()
250 return merge_files("test1.root") != 0
251
252
253def check_17_checkparentLFN():
254 """Check that parent LFN get merged correctly"""
255 parents = [("a", "b", "c"), ("a", "c", "d")]
256 m1 = FileMetaData()
257 m2 = FileMetaData()
258 lfn1 = ROOT.std.vector("std::string")()
259 lfn2 = ROOT.std.vector("std::string")()
260 for e in parents[0]:
261 lfn1.push_back(e)
262 for e in parents[1]:
263 lfn2.push_back(e)
264 m1.setParents(lfn1)
265 m2.setParents(lfn2)
266 m1.setRandomSeed("1")
267 m2.setRandomSeed("2")
268 create_testfile_direct("test1.root", m1)
269 create_testfile_direct("test2.root", m2)
270 merge_files("test1.root", "test2.root")
271 meta = get_metadata()
272 should_be = [e for e in sorted(set(parents[0] + parents[1]))]
273 is_actual = [meta.getParent(i) for i in range(meta.getNParents())]
274 return should_be == is_actual
275
276
277def check_18_checkEventNr():
278 """Check that event and mc numbers are summed correctly"""
279 evtNr = [10, 1243, 232, 1272, 25]
280 evtNrFullEvents = [i-1 for i in evtNr]
281 mcNr = [120, 821, 23, 923, 1]
282 files = []
283 for i, (e, f, m) in enumerate(zip(evtNr, evtNrFullEvents, mcNr)):
284 meta = FileMetaData()
285 meta.setNEvents(e)
286 meta.setNFullEvents(f)
287 meta.setMcEvents(m)
288 meta.setRandomSeed(str(i))
289 files.append(f"test{i}.root")
290 create_testfile_direct(files[-1], meta)
291 merge_files(*files)
292 meta = get_metadata()
293 return sum(evtNr) == meta.getNEvents() and sum(evtNrFullEvents) == meta.getNFullEvents() and sum(mcNr) == meta.getMcEvents()
294
295
296def check_19_lowhigh():
297 """Check that the low/high event numbers are merged correctly"""
298 lowhigh = [
299 (-1, -1, 0),
300 (0, 0, 0),
301 (0, 0, 1),
302 (0, 1, 0),
303 (1, 0, 0),
304 (1, 1, 1),
305 ]
306 files = []
307 for i, e in enumerate(lowhigh):
308 meta = FileMetaData()
309 meta.setNEvents(0 if e == (-1, -1, 0) else 1)
310 meta.setNFullEvents(0 if e == (-1, -1, 0) else 1)
311 meta.setRandomSeed(str(i))
312 meta.setLow(e[0], e[1], e[2])
313 meta.setHigh(e[0], e[1], e[2])
314 files.append(f"test{i}.root")
315 create_testfile_direct(files[-1], meta)
316
317 # test all possible combinations taking 2 elements from the list plus the
318 # full list in one go
319 indices = range(len(files))
320 tests = list(itertools.permutations(indices, 2)) + [indices]
321 for indices in tests:
322 low = min(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
323 high = max(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
324 if merge_files("-f", "--no-catalog", *(files[i] for i in indices)) != 0:
325 return False
326 meta = get_metadata()
327 if meta.getExperimentLow() != low[0] or meta.getRunLow() != low[1] or meta.getEventLow() != low[2]:
328 print("low event should be", low)
329 meta.Print()
330 return False
331 if meta.getExperimentHigh() != high[0] or meta.getRunHigh() != high[1] or meta.getEventHigh() != high[2]:
332 print("high event should be", high)
333 meta.Print()
334 return False
335 return True
336
337
338def check_20_test_file():
339 """Check that a merged file passes the b2file-check program"""
340 create_testfile("test1.root", events=1111)
341 create_testfile("test2.root", events=123)
342 merge_files("test1.root", "test2.root", filter_modified=True)
343 return subprocess.call(["b2file-check", "-n", "1234", "--mcevents", "1234",
344 "output.root", "EventMetaData", "MCParticles"]) == 0
345
346
347def check_21_eventmetadata():
348 """Check that merged files has all the correct even infos"""
349 create_testfile("test1.root", run=0, events=100, BELLE2_SEED="test1", BELLE2_USER="user1")
350 create_testfile("test2.root", run=1, events=100, BELLE2_SEED="test2", BELLE2_USER="user2")
351 merge_files("test1.root", "test2.root", "test1.root", filter_modified=True)
352 out = ROOT.TFile("output.root")
353 events = out.Get("tree")
354 entries = events.GetEntriesFast()
355 if entries != 300:
356 return False
357 # we expect to see the events from run 0 twice and the ones from run 1 once.
358 # So create a dictionary which contains the expected counts
359 eventcount = {(0, 0, i + 1): 2 for i in range(100)}
360 eventcount.update({(0, 1, i + 1): 1 for i in range(100)})
361 for i in range(entries):
362 events.GetEntry(i)
363 e = events.EventMetaData
364 eventcount[(e.getExperiment(), e.getRun(), e.getEvent())] -= 1
365 return max(eventcount.values()) == 0 and min(eventcount.values()) == 0
366
367
368def check_22_real_mc():
369 """Check that merging fails if real and MC data are mixed"""
370 create_testfile_direct("test1.root")
371 copyfile(basf2.find_file("framework/tests/fake_real.root"), "test2.root")
372 return merge_files("test1.root", "test2.root") != 0
373
374
375def check_23_legacy_ip():
376 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
377 create_testfile_direct("test1.root", global_tag="test_globaltag")
378 create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information")
379 if merge_files("test1.root", "test2.root") != 0:
380 return False
381 meta = get_metadata()
382 return meta.getDatabaseGlobalTag() == "test_globaltag"
383
384
385def check_24_legacy_ip_middle():
386 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
387 create_testfile_direct("test1.root", global_tag="test_globaltag,other")
388 create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information,other")
389 if merge_files("test1.root", "test2.root") != 0:
390 return False
391 meta = get_metadata()
392 return meta.getDatabaseGlobalTag() == "test_globaltag,other"
393
394
395def check_25_legacy_ip_only():
396 """Check that we can merge if the Legacy_IP_Information is inconsistent"""
397 create_testfile_direct("test1.root", global_tag="")
398 create_testfile_direct("test2.root", global_tag="Legacy_IP_Information")
399 if merge_files("test1.root", "test2.root") != 0:
400 return False
401 meta = get_metadata()
402 return meta.getDatabaseGlobalTag() == ""
403
404
405def check_XX_filemetaversion():
406 """Check that the Version of the FileMetaData hasn't changed.
407 If this check fails please check that the changes to FileMetaData don't affect b2file-merge and adapt the correct version number here."""
408 return FileMetaData.Class().GetClassVersion() == 11
409
410
411if __name__ == "__main__":
412 skip_test_if_light() # light builds don't have particle gun
413 failures = 0
414 existing = [e for e in sorted(globals().items()) if e[0].startswith("check_")]
415 for name, fcn in existing:
416 print(f"running {name}: {fcn.__doc__}")
417 with clean_working_directory():
418 if not fcn():
419 print(f"{name} failed")
420 failures += 1
421 else:
422 print(f"{name} passed")
423
424 sys.exit(failures)
425