Belle II Software  release-08-01-10
merge_files.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import os
12 import sys
13 import re
14 import subprocess
15 import itertools
16 from shutil import copyfile
17 import ROOT
18 from ROOT.Belle2 import FileMetaData
19 # we don't really need basf2 but it fixes the print buffering problem
20 import basf2
21 from b2test_utils import clean_working_directory, skip_test_if_light
22 
23 
24 def create_testfile(name, release=None, exp=0, run=0, events=100, branchNames=None, **argk):
25  """Create a test file from a steering string"""
26  if branchNames is None:
27  branchNames = []
28  global testfile_steering
29  env = dict(os.environ)
30  env.update(argk)
31 
32  steering_file = f"steering-{name}.py"
33  with open(steering_file, "w") as f:
34  f.write(testfile_steering)
35 
36  subprocess.call(["basf2", "-o", name, "--experiment", str(exp), "--run", str(run),
37  "-n", str(events), steering_file] + branchNames, env=env)
38 
39 
40 def create_testfile_direct(name, metadata=None, release="test_release", user="test_user", seed=None,
41  site="test_site", global_tag="test_globaltag", steering="test_steering"):
42  """similar to create_testfile but does it manually without running basf2 for
43  full control over the FileMetaData"""
44  if metadata is None:
45  metadata = FileMetaData()
46  if seed is None:
47  seed = name + "-seed"
48 
49  if seed is not None:
50  metadata.setRandomSeed(seed)
51  metadata.setLfn(name)
52  metadata.setCreationData("the most auspicious of days for testing", site, user, release)
53  metadata.setDatabaseGlobalTag(global_tag)
54  metadata.setSteering(steering)
55  f = ROOT.TFile(name, "RECREATE")
56  t = ROOT.TTree("persistent", "persistent")
57  t.Branch("FileMetaData", metadata)
58  t.Fill()
59  t.Write()
60  t = ROOT.TTree("tree", "tree")
61  event_meta = ROOT.Belle2.EventMetaData()
62  t.Branch("EventMetaData", event_meta)
63  t.Fill()
64  t.Write()
65  f.Close()
66 
67 
68 def get_metadata(name="output.root"):
69  """Get the metadata out of a root file"""
70  out = ROOT.TFile(name)
71  t = out.Get("persistent")
72  t.GetEntry(0)
73  return FileMetaData(t.FileMetaData)
74 
75 
76 def merge_files(*args, output="output.root", filter_modified=False):
77  """run the merging tool on all passed files
78 
79  Parameters:
80  output: name of the output file
81  filter_modified: if True omit warnings that the release is modified and
82  consistency cannot be checked
83  """
84  process = subprocess.run(["b2file-merge", "-q", output] + list(args), stdout=subprocess.PIPE)
85  # do we want to filter the modified release warning?
86  if filter_modified:
87  # if so replace them using regular expression
88  process.stdout = re.sub(rb"^\[WARNING\] File \"(.*?)\" created with modified software ([a-zA-Z0-9\-+]*?): "
89  rb"cannot verify that files are compatible\n", b"", process.stdout, flags=re.MULTILINE)
90 
91  # in any case print output
92  sys.stdout.buffer.write(process.stdout)
93  sys.stdout.buffer.flush()
94  # and return exitcode
95  return process.returncode
96 
97 
98 
99 testfile_steering = """
100 import os
101 import sys
102 import basf2
103 basf2.set_log_level(basf2.LogLevel.ERROR)
104 if "BELLE2_GLOBALTAG" in os.environ:
105  basf2.conditions.override_globaltags([os.environ["BELLE2_GLOBALTAG"]])
106 if "BELLE2_SEED" in os.environ:
107  basf2.set_random_seed(os.environ["BELLE2_SEED"])
108 main = basf2.create_path()
109 main.add_module("EventInfoSetter")
110 main.add_module("ParticleGun")
111 main.add_module("RootOutput", branchNames=sys.argv[1:])
112 basf2.process(main)
113 """
114 
115 
116 def check_01_existing():
117  """Check that merging a non exsiting file fails"""
118  create_testfile_direct("test2.root")
119  return merge_files("/test1.root") != 0 and merge_files("test2.root") == 0
120 
121 
122 def check_02_nonroot():
123  """Check that merging fails on non-root input files"""
124  with open("test1.root", "w") as f:
125  f.write("This is not a ROOT file")
126  return merge_files("test1.root") != 0
127 
128 
129 def check_03_overwrite():
130  """Check that overwriting fails if -f is missing"""
131  create_testfile_direct("test1.root")
132  with open("output.root", "w") as f:
133  f.write("stuff")
134  return merge_files("test1.root") != 0 and merge_files("-f", "test1.root") == 0
135 
136 
137 def check_04_access():
138  """Check that it fails if we cannot create output file"""
139  create_testfile_direct("test1.root")
140  return merge_files("test1.root", output="/nosuchdir/foo") != 0
141 
142 
143 def check_05_release():
144  """Check that it fails if the releases are different"""
145  create_testfile_direct("test1.root")
146  create_testfile_direct("test2.root", release="other_release")
147  return merge_files("test1.root", "test2.root") != 0
148 
149 
150 def check_06_empty_release():
151  """Check that merging fails with empty release valuses"""
152  create_testfile_direct("test1.root")
153  create_testfile_direct("test2.root", release="")
154  return merge_files("test1.root", "test2.root") != 0
155 
156 
157 def check_07_modified_release():
158  """Check that merging modified release gives warning about that but merging should work"""
159  create_testfile_direct("test1.root", release="test_release")
160  create_testfile_direct("test2.root", release="test_release-modified")
161  return merge_files("test1.root", "test2.root") == 0
162 
163 
164 def check_08_duplicate_seed():
165  """Check that we get a warning for identical seeds but merging should work"""
166  create_testfile_direct("test1.root", seed="seed1")
167  create_testfile_direct("test2.root", seed="seed1")
168  return merge_files("test1.root", "test2.root") == 0
169 
170 
171 def check_09_different_steering():
172  """Check that merging fails if the steering file is different"""
173  create_testfile_direct("test1.root",)
174  create_testfile_direct("test2.root", steering="my other steering")
175  return merge_files("test1.root", "test2.root") != 0
176 
177 
178 def check_10_different_globaltag():
179  """Check that merging fails if the global tag is different"""
180  create_testfile_direct("test1.root")
181  create_testfile_direct("test2.root", global_tag="other_globaltag")
182  return merge_files("test1.root", "test2.root") != 0
183 
184 
185 def check_11_branches():
186  """Check that merging fails if the branches in the event tree are different"""
187  create_testfile("test1.root")
188  create_testfile("test2.root", branchNames=["EventMetaData"])
189  return merge_files("test1.root", "test2.root", filter_modified=True) != 0
190 
191 
192 def check_12_hadded():
193  """Check that merging fails if the file has more then one entry in the persistent tree"""
194  create_testfile_direct("test1.root")
195  subprocess.call(["hadd", "test11.root", "test1.root", "test1.root"])
196  return merge_files("test11.root") != 0
197 
198 
199 def check_13_nopersistent():
200  """Check that merging fails without persistent tree"""
201  f = ROOT.TFile("test1.root", "RECREATE")
202  t = ROOT.TTree("tree", "tree")
203  t.Write()
204  f.Close()
205  return merge_files("test1.root") != 0
206 
207 
208 def check_14_noeventtree():
209  """Check that merging fails without event tree"""
210  f = ROOT.TFile("test1.root", "RECREATE")
211  t = ROOT.TTree("persistent", "persistent")
212  meta = FileMetaData()
213  t.Branch("FileMetaData", meta)
214  t.Fill()
215  t.Write()
216  f.Close()
217  return merge_files("test1.root") != 0
218 
219 
220 def check_15_noeventbranches():
221  """Check that merging fails without event tree"""
222  f = ROOT.TFile("test1.root", "RECREATE")
223  t = ROOT.TTree("persistent", "persistent")
224  meta = FileMetaData()
225  meta.setCreationData("date", "site", "user", "release")
226  t.Branch("FileMetaData", meta)
227  t.Fill()
228  t.Write()
229  t = ROOT.TTree("tree", "tree")
230  t.Write()
231  f.Close()
232  return merge_files("test1.root") != 0
233 
234 
235 def check_16_nonmergeable():
236  """Check that merging fails it there a ron mergeable persistent trees"""
237  f = ROOT.TFile("test1.root", "RECREATE")
238  t = ROOT.TTree("persistent", "persistent")
239  meta = FileMetaData()
240  meta.setCreationData("date", "site", "user", "release")
241  t.Branch("FileMetaData", meta)
242  t.Branch("AnotherMetaData", meta)
243  t.Fill()
244  t.Write()
245  t = ROOT.TTree("tree", "tree")
246  t.Branch("EventMetaData", meta)
247  t.Fill()
248  t.Write()
249  f.Close()
250  return merge_files("test1.root") != 0
251 
252 
253 def check_17_checkparentLFN():
254  """Check that parent LFN get merged correctly"""
255  parents = [("a", "b", "c"), ("a", "c", "d")]
256  m1 = FileMetaData()
257  m2 = FileMetaData()
258  lfn1 = ROOT.std.vector("std::string")()
259  lfn2 = ROOT.std.vector("std::string")()
260  for e in parents[0]:
261  lfn1.push_back(e)
262  for e in parents[1]:
263  lfn2.push_back(e)
264  m1.setParents(lfn1)
265  m2.setParents(lfn2)
266  m1.setRandomSeed("1")
267  m2.setRandomSeed("2")
268  create_testfile_direct("test1.root", m1)
269  create_testfile_direct("test2.root", m2)
270  merge_files("test1.root", "test2.root")
271  meta = get_metadata()
272  should_be = [e for e in sorted(set(parents[0] + parents[1]))]
273  is_actual = [meta.getParent(i) for i in range(meta.getNParents())]
274  return should_be == is_actual
275 
276 
277 def check_18_checkEventNr():
278  """Check that event and mc numbers are summed correctly"""
279  evtNr = [10, 1243, 232, 1272, 25]
280  evtNrFullEvents = [i-1 for i in evtNr]
281  mcNr = [120, 821, 23, 923, 1]
282  files = []
283  for i, (e, f, m) in enumerate(zip(evtNr, evtNrFullEvents, mcNr)):
284  meta = FileMetaData()
285  meta.setNEvents(e)
286  meta.setNFullEvents(f)
287  meta.setMcEvents(m)
288  meta.setRandomSeed(str(i))
289  files.append("test%d.root" % i)
290  create_testfile_direct(files[-1], meta)
291  merge_files(*files)
292  meta = get_metadata()
293  return sum(evtNr) == meta.getNEvents() and sum(evtNrFullEvents) == meta.getNFullEvents() and sum(mcNr) == meta.getMcEvents()
294 
295 
296 def check_19_lowhigh():
297  """Check that the low/high event numbers are merged correctly"""
298  lowhigh = [
299  (-1, -1, 0),
300  (0, 0, 0),
301  (0, 0, 1),
302  (0, 1, 0),
303  (1, 0, 0),
304  (1, 1, 1),
305  ]
306  files = []
307  for i, e in enumerate(lowhigh):
308  meta = FileMetaData()
309  meta.setNEvents(0 if e == (-1, -1, 0) else 1)
310  meta.setNFullEvents(0 if e == (-1, -1, 0) else 1)
311  meta.setRandomSeed(str(i))
312  meta.setLow(e[0], e[1], e[2])
313  meta.setHigh(e[0], e[1], e[2])
314  files.append("test%d.root" % i)
315  create_testfile_direct(files[-1], meta)
316 
317  # test all possible combinations taking 2 elements from the list plus the
318  # full list in one go
319  indices = range(len(files))
320  tests = list(itertools.permutations(indices, 2)) + [indices]
321  for indices in tests:
322  low = min(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
323  high = max(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
324  if merge_files("-f", "--no-catalog", *(files[i] for i in indices)) != 0:
325  return False
326  meta = get_metadata()
327  if meta.getExperimentLow() != low[0] or meta.getRunLow() != low[1] or meta.getEventLow() != low[2]:
328  print("low event should be", low)
329  meta.Print()
330  return False
331  if meta.getExperimentHigh() != high[0] or meta.getRunHigh() != high[1] or meta.getEventHigh() != high[2]:
332  print("high event should be", high)
333  meta.Print()
334  return False
335  return True
336 
337 
338 def check_20_test_file():
339  """Check that a merged file passes the b2file-check program"""
340  create_testfile("test1.root", events=1111)
341  create_testfile("test2.root", events=123)
342  merge_files("test1.root", "test2.root", filter_modified=True)
343  return subprocess.call(["b2file-check", "-n", "1234", "--mcevents", "1234",
344  "output.root", "EventMetaData", "MCParticles"]) == 0
345 
346 
347 def check_21_eventmetadata():
348  """Check that merged files has all the correct even infos"""
349  create_testfile("test1.root", run=0, events=100, BELLE2_SEED="test1", BELLE2_USER="user1")
350  create_testfile("test2.root", run=1, events=100, BELLE2_SEED="test2", BELLE2_USER="user2")
351  merge_files("test1.root", "test2.root", "test1.root", filter_modified=True)
352  out = ROOT.TFile("output.root")
353  events = out.Get("tree")
354  entries = events.GetEntriesFast()
355  if entries != 300:
356  return False
357  # we expect to see the events from run 0 twice and the ones from run 1 once.
358  # So create a dictionary which contains the expected counts
359  eventcount = {(0, 0, i + 1): 2 for i in range(100)}
360  eventcount.update({(0, 1, i + 1): 1 for i in range(100)})
361  for i in range(entries):
362  events.GetEntry(i)
363  e = events.EventMetaData
364  eventcount[(e.getExperiment(), e.getRun(), e.getEvent())] -= 1
365  return max(eventcount.values()) == 0 and min(eventcount.values()) == 0
366 
367 
368 def check_22_real_mc():
369  """Check that merging fails if real and MC data are mixed"""
370  create_testfile_direct("test1.root")
371  copyfile(basf2.find_file("framework/tests/fake_real.root"), "test2.root")
372  return merge_files("test1.root", "test2.root") != 0
373 
374 
375 def check_23_legacy_ip():
376  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
377  create_testfile_direct("test1.root", global_tag="test_globaltag")
378  create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information")
379  if merge_files("test1.root", "test2.root") != 0:
380  return False
381  meta = get_metadata()
382  return meta.getDatabaseGlobalTag() == "test_globaltag"
383 
384 
385 def check_24_legacy_ip_middle():
386  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
387  create_testfile_direct("test1.root", global_tag="test_globaltag,other")
388  create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information,other")
389  if merge_files("test1.root", "test2.root") != 0:
390  return False
391  meta = get_metadata()
392  return meta.getDatabaseGlobalTag() == "test_globaltag,other"
393 
394 
395 def check_25_legacy_ip_only():
396  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
397  create_testfile_direct("test1.root", global_tag="")
398  create_testfile_direct("test2.root", global_tag="Legacy_IP_Information")
399  if merge_files("test1.root", "test2.root") != 0:
400  return False
401  meta = get_metadata()
402  return meta.getDatabaseGlobalTag() == ""
403 
404 
405 def check_XX_filemetaversion():
406  """Check that the Version of the FileMetaData hasn't changed.
407  If this check fails please check that the changes to FileMetaData don't
408  affect b2file-merge and adapt the correct version number here."""
409  return FileMetaData.Class().GetClassVersion() == 11
410 
411 
412 if __name__ == "__main__":
413  skip_test_if_light() # light builds don't have particle gun
414  failures = 0
415  existing = [e for e in sorted(globals().items()) if e[0].startswith("check_")]
416  for name, fcn in existing:
417  print(f"running {name}: {fcn.__doc__}")
418  with clean_working_directory():
419  if not fcn():
420  print(f"{name} failed")
421  failures += 1
422  else:
423  print(f"{name} passed")
424 
425  sys.exit(failures)