Belle II Software  release-06-01-15
merge_files.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import os
12 import sys
13 import re
14 import subprocess
15 import itertools
16 from shutil import copyfile
17 import ROOT
18 from ROOT.Belle2 import FileMetaData
19 # we don't really need basf2 but it fixes the print buffering problem
20 import basf2
21 from b2test_utils import clean_working_directory, skip_test_if_light
22 
23 
24 def create_testfile(name, release=None, exp=0, run=0, events=100, branchNames=None, **argk):
25  """Create a test file from a steering string"""
26  if branchNames is None:
27  branchNames = []
28  global testfile_steering
29  env = dict(os.environ)
30  env.update(argk)
31 
32  steering_file = f"steering-{name}.py"
33  with open(steering_file, "w") as f:
34  f.write(testfile_steering)
35 
36  subprocess.call(["basf2", "-o", name, "--experiment", str(exp), "--run", str(run),
37  "-n", str(events), steering_file] + branchNames, env=env)
38 
39 
40 def create_testfile_direct(name, metadata=None, release="test_release", user="test_user", seed=None,
41  site="test_site", global_tag="test_globaltag", steering="test_steering"):
42  """similar to create_testfile but does it manually without running basf2 for
43  full control over the FileMetaData"""
44  if metadata is None:
45  metadata = FileMetaData()
46  if seed is None:
47  seed = name + "-seed"
48 
49  if seed is not None:
50  metadata.setRandomSeed(seed)
51  metadata.setLfn(name)
52  metadata.setCreationData("the most auspicious of days for testing", site, user, release)
53  metadata.setDatabaseGlobalTag(global_tag)
54  metadata.setSteering(steering)
55  f = ROOT.TFile(name, "RECREATE")
56  t = ROOT.TTree("persistent", "persistent")
57  t.Branch("FileMetaData", metadata)
58  t.Fill()
59  t.Write()
60  t = ROOT.TTree("tree", "tree")
61  event_meta = ROOT.Belle2.EventMetaData()
62  t.Branch("EventMetaData", event_meta)
63  t.Fill()
64  t.Write()
65  f.Close()
66 
67 
68 def get_metadata(name="output.root"):
69  """Get the metadata out of a root file"""
70  out = ROOT.TFile(name)
71  t = out.Get("persistent")
72  t.GetEntry(0)
73  return FileMetaData(t.FileMetaData)
74 
75 
76 def merge_files(*args, output="output.root", filter_modified=False):
77  """run the merging tool on all passed files
78 
79  Parameters:
80  output: name of the output file
81  filter_modified: if True omit warnings that the release is modified and
82  consistency cannot be checked
83  """
84  process = subprocess.run(["b2file-merge", "-q", output] + list(args), stdout=subprocess.PIPE)
85  # do we want to filter the modified release warning?
86  if filter_modified:
87  # if so replace them using regular expression
88  process.stdout = re.sub(rb"^\[WARNING\] File \"(.*?)\" created with modified software ([a-zA-Z0-9\-+]*?): "
89  rb"cannot verify that files are compatible\n", b"", process.stdout, flags=re.MULTILINE)
90 
91  # in any case print output
92  sys.stdout.buffer.write(process.stdout)
93  sys.stdout.buffer.flush()
94  # and return exitcode
95  return process.returncode
96 
97 
98 
99 testfile_steering = """
100 import os
101 import sys
102 import basf2
103 basf2.set_log_level(basf2.LogLevel.ERROR)
104 if "BELLE2_GLOBALTAG" in os.environ:
105  basf2.conditions.override_globaltags([os.environ["BELLE2_GLOBALTAG"]])
106 if "BELLE2_SEED" in os.environ:
107  basf2.set_random_seed(os.environ["BELLE2_SEED"])
108 main = basf2.create_path()
109 main.add_module("EventInfoSetter")
110 main.add_module("ParticleGun")
111 main.add_module("RootOutput", branchNames=sys.argv[1:])
112 basf2.process(main)
113 """
114 
115 
116 def check_01_existing():
117  """Check that merging a non exsiting file fails"""
118  create_testfile_direct("test2.root")
119  return merge_files("/test1.root") != 0 and merge_files("test2.root") == 0
120 
121 
122 def check_02_nonroot():
123  """Check that merging fails on non-root input files"""
124  with open("test1.root", "w") as f:
125  f.write("This is not a ROOT file")
126  return merge_files("test1.root") != 0
127 
128 
129 def check_03_overwrite():
130  """Check that overwriting fails if -f is missing"""
131  create_testfile_direct("test1.root")
132  with open("output.root", "w") as f:
133  f.write("stuff")
134  return merge_files("test1.root") != 0 and merge_files("-f", "test1.root") == 0
135 
136 
137 def check_04_access():
138  """Check that it fails if we cannot create output file"""
139  create_testfile_direct("test1.root")
140  return merge_files("test1.root", output="/nosuchdir/foo") != 0
141 
142 
143 def check_05_release():
144  """Check that it fails if the releases are different"""
145  create_testfile_direct("test1.root")
146  create_testfile_direct("test2.root", release="other_release")
147  return merge_files("test1.root", "test2.root") != 0
148 
149 
150 def check_06_empty_release():
151  """Check that merging fails with empty release valuses"""
152  create_testfile_direct("test1.root")
153  create_testfile_direct("test2.root", release="")
154  return merge_files("test1.root", "test2.root") != 0
155 
156 
157 def check_07_modified_release():
158  """Check that merging modified release gives warning about that but merging should work"""
159  create_testfile_direct("test1.root", release="test_release")
160  create_testfile_direct("test2.root", release="test_release-modified")
161  return merge_files("test1.root", "test2.root") == 0
162 
163 
164 def check_08_duplicate_seed():
165  """Check that we get a warning for identical seeds but merging should work"""
166  create_testfile_direct("test1.root", seed="seed1")
167  create_testfile_direct("test2.root", seed="seed1")
168  return merge_files("test1.root", "test2.root") == 0
169 
170 
171 def check_09_different_steering():
172  """Check that merging fails if the steering file is different"""
173  create_testfile_direct("test1.root",)
174  create_testfile_direct("test2.root", steering="my other steering")
175  return merge_files("test1.root", "test2.root") != 0
176 
177 
178 def check_10_different_globaltag():
179  """Check that merging fails if the global tag is different"""
180  create_testfile_direct("test1.root")
181  create_testfile_direct("test2.root", global_tag="other_globaltag")
182  return merge_files("test1.root", "test2.root") != 0
183 
184 
185 def check_11_branches():
186  """Check that merging fails if the branches in the event tree are different"""
187  create_testfile("test1.root")
188  create_testfile("test2.root", branchNames=["EventMetaData"])
189  return merge_files("test1.root", "test2.root", filter_modified=True) != 0
190 
191 
192 def check_12_hadded():
193  """Check that merging fails if the file has more then one entry in the persistent tree"""
194  create_testfile_direct("test1.root")
195  subprocess.call(["hadd", "test11.root", "test1.root", "test1.root"])
196  return merge_files("test11.root") != 0
197 
198 
199 def check_13_nopersistent():
200  """Check that merging fails without persistent tree"""
201  f = ROOT.TFile("test1.root", "RECREATE")
202  t = ROOT.TTree("tree", "tree")
203  t.Write()
204  f.Close()
205  return merge_files("test1.root") != 0
206 
207 
208 def check_14_noeventtree():
209  """Check that merging fails without event tree"""
210  f = ROOT.TFile("test1.root", "RECREATE")
211  t = ROOT.TTree("persistent", "persistent")
212  meta = FileMetaData()
213  t.Branch("FileMetaData", meta)
214  t.Fill()
215  t.Write()
216  f.Close()
217  return merge_files("test1.root") != 0
218 
219 
220 def check_15_noeventbranches():
221  """Check that merging fails without event tree"""
222  f = ROOT.TFile("test1.root", "RECREATE")
223  t = ROOT.TTree("persistent", "persistent")
224  meta = FileMetaData()
225  meta.setCreationData("date", "site", "user", "release")
226  t.Branch("FileMetaData", meta)
227  t.Fill()
228  t.Write()
229  t = ROOT.TTree("tree", "tree")
230  t.Write()
231  f.Close()
232  return merge_files("test1.root") != 0
233 
234 
235 def check_16_nonmergeable():
236  """Check that merging fails it there a ron mergeable persistent trees"""
237  f = ROOT.TFile("test1.root", "RECREATE")
238  t = ROOT.TTree("persistent", "persistent")
239  meta = FileMetaData()
240  meta.setCreationData("date", "site", "user", "release")
241  t.Branch("FileMetaData", meta)
242  t.Branch("AnotherMetaData", meta)
243  t.Fill()
244  t.Write()
245  t = ROOT.TTree("tree", "tree")
246  t.Branch("EventMetaData", meta)
247  t.Fill()
248  t.Write()
249  f.Close()
250  return merge_files("test1.root") != 0
251 
252 
253 def check_17_checkparentLFN():
254  """Check that parent LFN get merged correctly"""
255  parents = [("a", "b", "c"), ("a", "c", "d")]
256  m1 = FileMetaData()
257  m2 = FileMetaData()
258  lfn1 = ROOT.std.vector("std::string")()
259  lfn2 = ROOT.std.vector("std::string")()
260  for e in parents[0]:
261  lfn1.push_back(e)
262  for e in parents[1]:
263  lfn2.push_back(e)
264  m1.setParents(lfn1)
265  m2.setParents(lfn2)
266  m1.setRandomSeed("1")
267  m2.setRandomSeed("2")
268  create_testfile_direct("test1.root", m1)
269  create_testfile_direct("test2.root", m2)
270  merge_files("test1.root", "test2.root")
271  meta = get_metadata()
272  should_be = [e for e in sorted(set(parents[0] + parents[1]))]
273  is_actual = [meta.getParent(i) for i in range(meta.getNParents())]
274  return should_be == is_actual
275 
276 
277 def check_18_checkEventNr():
278  """Check that event and mc numbers are summed correctly"""
279  evtNr = [10, 1243, 232, 1272, 25]
280  mcNr = [120, 821, 23, 923, 1]
281  files = []
282  for i, (e, m) in enumerate(zip(evtNr, mcNr)):
283  meta = FileMetaData()
284  meta.setNEvents(e)
285  meta.setMcEvents(m)
286  meta.setRandomSeed(str(i))
287  files.append("test%d.root" % i)
288  create_testfile_direct(files[-1], meta)
289  merge_files(*files)
290  meta = get_metadata()
291  return sum(evtNr) == meta.getNEvents() and sum(mcNr) == meta.getMcEvents()
292 
293 
294 def check_19_lowhigh():
295  """Check that the low/high event numbers are merged correctly"""
296  lowhigh = [
297  (-1, -1, 0),
298  (0, 0, 0),
299  (0, 0, 1),
300  (0, 1, 0),
301  (1, 0, 0),
302  (1, 1, 1),
303  ]
304  files = []
305  for i, e in enumerate(lowhigh):
306  meta = FileMetaData()
307  meta.setNEvents(0 if e == (-1, -1, 0) else 1)
308  meta.setRandomSeed(str(i))
309  meta.setLow(e[0], e[1], e[2])
310  meta.setHigh(e[0], e[1], e[2])
311  files.append("test%d.root" % i)
312  create_testfile_direct(files[-1], meta)
313 
314  # test all possible combinations taking 2 elements from the list plus the
315  # full list in one go
316  indices = range(len(files))
317  tests = list(itertools.permutations(indices, 2)) + [indices]
318  for indices in tests:
319  low = min(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
320  high = max(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
321  if merge_files("-f", "--no-catalog", *(files[i] for i in indices)) != 0:
322  return False
323  meta = get_metadata()
324  if meta.getExperimentLow() != low[0] or meta.getRunLow() != low[1] or meta.getEventLow() != low[2]:
325  print("low event should be", low)
326  meta.Print()
327  return False
328  if meta.getExperimentHigh() != high[0] or meta.getRunHigh() != high[1] or meta.getEventHigh() != high[2]:
329  print("high event should be", high)
330  meta.Print()
331  return False
332  return True
333 
334 
335 def check_20_test_file():
336  """Check that a merged file passes the b2file-check program"""
337  create_testfile("test1.root", events=1111)
338  create_testfile("test2.root", events=123)
339  merge_files("test1.root", "test2.root", filter_modified=True)
340  return subprocess.call(["b2file-check", "-n", "1234", "--mcevents", "1234",
341  "output.root", "EventMetaData", "MCParticles"]) == 0
342 
343 
344 def check_21_eventmetadata():
345  """Check that merged files has all the correct even infos"""
346  create_testfile("test1.root", run=0, events=100, BELLE2_SEED="test1", BELLE2_USER="user1")
347  create_testfile("test2.root", run=1, events=100, BELLE2_SEED="test2", BELLE2_USER="user2")
348  merge_files("test1.root", "test2.root", "test1.root", filter_modified=True)
349  out = ROOT.TFile("output.root")
350  events = out.Get("tree")
351  entries = events.GetEntriesFast()
352  if entries != 300:
353  return False
354  # we expect to see the events from run 0 twice and the ones from run 1 once.
355  # So create a dictionary which contains the expected counts
356  eventcount = {(0, 0, i + 1): 2 for i in range(100)}
357  eventcount.update({(0, 1, i + 1): 1 for i in range(100)})
358  for i in range(entries):
359  events.GetEntry(i)
360  e = events.EventMetaData
361  eventcount[(e.getExperiment(), e.getRun(), e.getEvent())] -= 1
362  return max(eventcount.values()) == 0 and min(eventcount.values()) == 0
363 
364 
365 def check_22_real_mc():
366  """Check that merging fails if real and MC data are mixed"""
367  create_testfile_direct("test1.root")
368  copyfile(basf2.find_file("framework/tests/fake_real.root"), "test2.root")
369  return merge_files("test1.root", "test2.root") != 0
370 
371 
372 def check_23_legacy_ip():
373  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
374  create_testfile_direct("test1.root", global_tag="test_globaltag")
375  create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information")
376  if merge_files("test1.root", "test2.root") != 0:
377  return False
378  meta = get_metadata()
379  return meta.getDatabaseGlobalTag() == "test_globaltag"
380 
381 
382 def check_24_legacy_ip_middle():
383  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
384  create_testfile_direct("test1.root", global_tag="test_globaltag,other")
385  create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information,other")
386  if merge_files("test1.root", "test2.root") != 0:
387  return False
388  meta = get_metadata()
389  return meta.getDatabaseGlobalTag() == "test_globaltag,other"
390 
391 
392 def check_25_legacy_ip_only():
393  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
394  create_testfile_direct("test1.root", global_tag="")
395  create_testfile_direct("test2.root", global_tag="Legacy_IP_Information")
396  if merge_files("test1.root", "test2.root") != 0:
397  return False
398  meta = get_metadata()
399  return meta.getDatabaseGlobalTag() == ""
400 
401 
402 def check_XX_filemetaversion():
403  """Check that the Version of the FileMetaData hasn't changed.
404  If this check fails please check that the changes to FileMetaData don't
405  affect merge_basf2_files and adapt the correct version number here."""
406  return FileMetaData.Class().GetClassVersion() == 10
407 
408 
409 if __name__ == "__main__":
410  skip_test_if_light() # light builds don't have particle gun
411  failures = 0
412  existing = [e for e in sorted(globals().items()) if e[0].startswith("check_")]
413  for name, fcn in existing:
414  print(f"running {name}: {fcn.__doc__}")
415  with clean_working_directory():
416  if not fcn():
417  print(f"{name} failed")
418  failures += 1
419  else:
420  print(f"{name} passed")
421 
422  sys.exit(failures)