Belle II Software  release-05-01-25
merge_files.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 import os
5 import sys
6 import re
7 import subprocess
8 import itertools
9 from shutil import copyfile
10 import ROOT
11 from ROOT.Belle2 import FileMetaData, EventMetaData
12 # we don't really need basf2 but it fixes the print buffering problem
13 import basf2
14 from b2test_utils import clean_working_directory, skip_test_if_light
15 
16 
17 def create_testfile(name, release=None, exp=0, run=0, events=100, branchNames=[], **argk):
18  """Create a test file from a steering string"""
19  global testfile_steering
20  env = dict(os.environ)
21  env.update(argk)
22 
23  steering_file = "steering-{0}.py".format(name)
24  with open(steering_file, "w") as f:
25  f.write(testfile_steering)
26 
27  subprocess.call(["basf2", "-o", name, "--experiment", str(exp), "--run", str(run),
28  "-n", str(events), steering_file] + branchNames, env=env)
29 
30 
31 def create_testfile_direct(name, metadata=None, release="test_release", user="test_user", seed=None,
32  site="test_site", global_tag="test_globaltag", steering="test_steering"):
33  """similar to create_testfile but does it manually without running basf2 for
34  full control over the FileMetaData"""
35  if metadata is None:
36  metadata = FileMetaData()
37  if seed is None:
38  seed = name + "-seed"
39 
40  if seed is not None:
41  metadata.setRandomSeed(seed)
42  metadata.setLfn(name)
43  metadata.setCreationData("the most auspicious of days for testing", site, user, release)
44  metadata.setDatabaseGlobalTag(global_tag)
45  metadata.setSteering(steering)
46  f = ROOT.TFile(name, "RECREATE")
47  t = ROOT.TTree("persistent", "persistent")
48  t.Branch("FileMetaData", metadata)
49  t.Fill()
50  t.Write()
51  t = ROOT.TTree("tree", "tree")
52  event_meta = ROOT.Belle2.EventMetaData()
53  t.Branch("EventMetaData", event_meta)
54  t.Fill()
55  t.Write()
56  f.Close()
57 
58 
59 def get_metadata(name="output.root"):
60  """Get the metadata out of a root file"""
61  out = ROOT.TFile(name)
62  t = out.Get("persistent")
63  t.GetEntry(0)
64  return FileMetaData(t.FileMetaData)
65 
66 
67 def merge_files(*args, output="output.root", filter_modified=False):
68  """run the merging tool on all passed files
69 
70  Parameters:
71  output: name of the output file
72  filter_modified: if True omit warnings that the release is modified and
73  consistency cannot be checked
74  """
75  process = subprocess.run(["b2file-merge", "-q", output] + list(args), stdout=subprocess.PIPE)
76  # do we want to filter the modified release warning?
77  if filter_modified:
78  # if so replace them using regular expression
79  process.stdout = re.sub(b"^\[WARNING\] File \"(.*?)\" created with modified software ([a-zA-Z0-9\-+]*?): "
80  b"cannot verify that files are compatible\\n", b"", process.stdout, flags=re.MULTILINE)
81 
82  # in any case print output
83  sys.stdout.buffer.write(process.stdout)
84  sys.stdout.buffer.flush()
85  # and return exitcode
86  return process.returncode
87 
88 
89 
90 testfile_steering = """
91 import os
92 import sys
93 import basf2
94 basf2.set_log_level(basf2.LogLevel.ERROR)
95 if "BELLE2_GLOBALTAG" in os.environ:
96  basf2.reset_database()
97  basf2.use_central_database(os.environ["BELLE2_GLOBALTAG"])
98 if "BELLE2_SEED" in os.environ:
99  basf2.set_random_seed(os.environ["BELLE2_SEED"])
100 main = basf2.create_path()
101 main.add_module("EventInfoSetter")
102 main.add_module("ParticleGun")
103 main.add_module("RootOutput", branchNames=sys.argv[1:])
104 basf2.process(main)
105 """
106 
107 
108 def check_01_existing():
109  """Check that merging a non exsiting file fails"""
110  create_testfile_direct("test2.root")
111  return merge_files("test1.root") != 0 and merge_files("test2.root") == 0
112 
113 
114 def check_02_nonroot():
115  """Check that merging fails on non-root input files"""
116  with open("test1.root", "w") as f:
117  f.write("This is not a ROOT file")
118  return merge_files("test1.root") != 0
119 
120 
121 def check_03_overwrite():
122  """Check that overwriting fails if -f is missing"""
123  create_testfile_direct("test1.root")
124  with open("output.root", "w") as f:
125  f.write("stuff")
126  return merge_files("test1.root") != 0 and merge_files("-f", "test1.root") == 0
127 
128 
129 def check_04_access():
130  """Check that it fails if we cannot create output file"""
131  create_testfile_direct("test1.root")
132  return merge_files("test1.root", output="nosuchdir/foo") != 0
133 
134 
135 def check_05_release():
136  """Check that it fails if the releases are different"""
137  create_testfile_direct("test1.root")
138  create_testfile_direct("test2.root", release="other_release")
139  return merge_files("test1.root", "test2.root") != 0
140 
141 
142 def check_06_empty_release():
143  """Check that merging fails with empty release valuses"""
144  create_testfile_direct("test1.root")
145  create_testfile_direct("test2.root", release="")
146  return merge_files("test1.root", "test2.root") != 0
147 
148 
149 def check_07_modified_release():
150  """Check that merging modified release gives warning about that but merging should work"""
151  create_testfile_direct("test1.root", release="test_release")
152  create_testfile_direct("test2.root", release="test_release-modified")
153  return merge_files("test1.root", "test2.root") == 0
154 
155 
156 def check_08_duplicate_seed():
157  """Check that we get a warning for identical seeds but merging should work"""
158  create_testfile_direct("test1.root", seed="seed1")
159  create_testfile_direct("test2.root", seed="seed1")
160  return merge_files("test1.root", "test2.root") == 0
161 
162 
163 def check_09_different_steering():
164  """Check that merging fails if the steering file is different"""
165  create_testfile_direct("test1.root",)
166  create_testfile_direct("test2.root", steering="my other steering")
167  return merge_files("test1.root", "test2.root") != 0
168 
169 
170 def check_10_different_globaltag():
171  """Check that merging fails if the global tag is different"""
172  create_testfile_direct("test1.root")
173  create_testfile_direct("test2.root", global_tag="other_globaltag")
174  return merge_files("test1.root", "test2.root") != 0
175 
176 
177 def check_11_branches():
178  """Check that merging fails if the branches in the event tree are different"""
179  create_testfile("test1.root")
180  create_testfile("test2.root", branchNames=["EventMetaData"])
181  return merge_files("test1.root", "test2.root", filter_modified=True) != 0
182 
183 
184 def check_12_hadded():
185  """Check that merging fails if the file has more then one entry in the persistent tree"""
186  create_testfile_direct("test1.root")
187  subprocess.call(["hadd", "test11.root", "test1.root", "test1.root"])
188  return merge_files("test11.root") != 0
189 
190 
191 def check_13_nopersistent():
192  """Check that merging fails without persistent tree"""
193  f = ROOT.TFile("test1.root", "RECREATE")
194  t = ROOT.TTree("tree", "tree")
195  t.Write()
196  f.Close()
197  return merge_files("test1.root") != 0
198 
199 
200 def check_14_noeventtree():
201  """Check that merging fails without event tree"""
202  f = ROOT.TFile("test1.root", "RECREATE")
203  t = ROOT.TTree("persistent", "persistent")
204  meta = FileMetaData()
205  t.Branch("FileMetaData", meta)
206  t.Fill()
207  t.Write()
208  f.Close()
209  return merge_files("test1.root") != 0
210 
211 
212 def check_15_noeventbranches():
213  """Check that merging fails without event tree"""
214  f = ROOT.TFile("test1.root", "RECREATE")
215  t = ROOT.TTree("persistent", "persistent")
216  meta = FileMetaData()
217  meta.setCreationData("date", "site", "user", "release")
218  t.Branch("FileMetaData", meta)
219  t.Fill()
220  t.Write()
221  t = ROOT.TTree("tree", "tree")
222  t.Write()
223  f.Close()
224  return merge_files("test1.root") != 0
225 
226 
227 def check_16_nonmergeable():
228  """Check that merging fails it there a ron mergeable persistent trees"""
229  f = ROOT.TFile("test1.root", "RECREATE")
230  t = ROOT.TTree("persistent", "persistent")
231  meta = FileMetaData()
232  meta.setCreationData("date", "site", "user", "release")
233  t.Branch("FileMetaData", meta)
234  t.Branch("AnotherMetaData", meta)
235  t.Fill()
236  t.Write()
237  t = ROOT.TTree("tree", "tree")
238  t.Branch("EventMetaData", meta)
239  t.Fill()
240  t.Write()
241  f.Close()
242  return merge_files("test1.root") != 0
243 
244 
245 def check_17_checkparentLFN():
246  """Check that parent LFN get merged correctly"""
247  parents = [("a", "b", "c"), ("a", "c", "d")]
248  m1 = FileMetaData()
249  m2 = FileMetaData()
250  lfn1 = ROOT.std.vector("std::string")()
251  lfn2 = ROOT.std.vector("std::string")()
252  for e in parents[0]:
253  lfn1.push_back(e)
254  for e in parents[1]:
255  lfn2.push_back(e)
256  m1.setParents(lfn1)
257  m2.setParents(lfn2)
258  m1.setRandomSeed("1")
259  m2.setRandomSeed("2")
260  create_testfile_direct("test1.root", m1)
261  create_testfile_direct("test2.root", m2)
262  merge_files("test1.root", "test2.root")
263  meta = get_metadata()
264  should_be = [e for e in sorted(set(parents[0] + parents[1]))]
265  is_actual = [meta.getParent(i) for i in range(meta.getNParents())]
266  return should_be == is_actual
267 
268 
269 def check_18_checkEventNr():
270  """Check that event and mc numbers are summed correctly"""
271  evtNr = [10, 1243, 232, 1272, 25]
272  mcNr = [120, 821, 23, 923, 1]
273  files = []
274  for i, (e, m) in enumerate(zip(evtNr, mcNr)):
275  meta = FileMetaData()
276  meta.setNEvents(e)
277  meta.setMcEvents(m)
278  meta.setRandomSeed(str(i))
279  files.append("test%d.root" % i)
280  create_testfile_direct(files[-1], meta)
281  merge_files(*files)
282  meta = get_metadata()
283  return sum(evtNr) == meta.getNEvents() and sum(mcNr) == meta.getMcEvents()
284 
285 
286 def check_19_lowhigh():
287  """Check that the low/high event numbers are merged correctly"""
288  lowhigh = [
289  (-1, -1, 0),
290  (0, 0, 0),
291  (0, 0, 1),
292  (0, 1, 0),
293  (1, 0, 0),
294  (1, 1, 1),
295  ]
296  files = []
297  for i, e in enumerate(lowhigh):
298  meta = FileMetaData()
299  meta.setNEvents(0 if e == (-1, -1, 0) else 1)
300  meta.setRandomSeed(str(i))
301  meta.setLow(e[0], e[1], e[2])
302  meta.setHigh(e[0], e[1], e[2])
303  files.append("test%d.root" % i)
304  create_testfile_direct(files[-1], meta)
305 
306  # test all possible combinations taking 2 elements from the list plus the
307  # full list in one go
308  indices = range(len(files))
309  tests = list(itertools.permutations(indices, 2)) + [indices]
310  for indices in tests:
311  low = min(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
312  high = max(lowhigh[i] for i in indices if lowhigh[i] != (-1, -1, 0))
313  if merge_files("-f", "--no-catalog", *(files[i] for i in indices)) != 0:
314  return False
315  meta = get_metadata()
316  if meta.getExperimentLow() != low[0] or meta.getRunLow() != low[1] or meta.getEventLow() != low[2]:
317  print("low event should be", low)
318  meta.Print()
319  return False
320  if meta.getExperimentHigh() != high[0] or meta.getRunHigh() != high[1] or meta.getEventHigh() != high[2]:
321  print("high event should be", high)
322  meta.Print()
323  return False
324  return True
325 
326 
327 def check_20_test_file():
328  """Check that a merged file passes the b2file-check program"""
329  create_testfile("test1.root", events=1111)
330  create_testfile("test2.root", events=123)
331  merge_files("test1.root", "test2.root", filter_modified=True)
332  return subprocess.call(["b2file-check", "-n", "1234", "--mcevents", "1234",
333  "output.root", "EventMetaData", "MCParticles"]) == 0
334 
335 
336 def check_21_eventmetadata():
337  """Check that merged files has all the correct even infos"""
338  create_testfile("test1.root", run=0, events=100, BELLE2_SEED="test1", BELLE2_USER="user1")
339  create_testfile("test2.root", run=1, events=100, BELLE2_SEED="test2", BELLE2_USER="user2")
340  merge_files("test1.root", "test2.root", "test1.root", filter_modified=True)
341  out = ROOT.TFile("output.root")
342  events = out.Get("tree")
343  entries = events.GetEntriesFast()
344  if entries != 300:
345  return False
346  # we expect to see the events from run 0 twice and the ones from run 1 once.
347  # So create a dictionary which contains the expected counts
348  eventcount = {(0, 0, i+1): 2 for i in range(100)}
349  eventcount.update({(0, 1, i+1): 1 for i in range(100)})
350  for i in range(entries):
351  events.GetEntry(i)
352  e = events.EventMetaData
353  eventcount[(e.getExperiment(), e.getRun(), e.getEvent())] -= 1
354  return max(eventcount.values()) == 0 and min(eventcount.values()) == 0
355 
356 
357 def check_22_real_mc():
358  """Check that merging fails if real and MC data are mixed"""
359  create_testfile_direct("test1.root")
360  copyfile(basf2.find_file("framework/tests/fake_real.root"), "test2.root")
361  return merge_files("test1.root", "test2.root") != 0
362 
363 
364 def check_23_legacy_ip():
365  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
366  create_testfile_direct("test1.root", global_tag="test_globaltag")
367  create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information")
368  if merge_files("test1.root", "test2.root") != 0:
369  return False
370  meta = get_metadata()
371  return meta.getDatabaseGlobalTag() == "test_globaltag"
372 
373 
374 def check_24_legacy_ip_middle():
375  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
376  create_testfile_direct("test1.root", global_tag="test_globaltag,other")
377  create_testfile_direct("test2.root", global_tag="test_globaltag,Legacy_IP_Information,other")
378  if merge_files("test1.root", "test2.root") != 0:
379  return False
380  meta = get_metadata()
381  return meta.getDatabaseGlobalTag() == "test_globaltag,other"
382 
383 
384 def check_25_legacy_ip_only():
385  """Check that we can merge if the Legacy_IP_Information is inconsistent"""
386  create_testfile_direct("test1.root", global_tag="")
387  create_testfile_direct("test2.root", global_tag="Legacy_IP_Information")
388  if merge_files("test1.root", "test2.root") != 0:
389  return False
390  meta = get_metadata()
391  return meta.getDatabaseGlobalTag() == ""
392 
393 
394 def check_XX_filemetaversion():
395  """Check that the Version of the FileMetaData hasn't changed.
396  If this check fails please check that the changes to FileMetaData don't
397  affect merge_basf2_files and adapt the correct version number here."""
398  return FileMetaData.Class().GetClassVersion() == 10
399 
400 
401 if __name__ == "__main__":
402  skip_test_if_light() # light builds don't have particle gun
403  failures = 0
404  existing = [e for e in sorted(globals().items()) if e[0].startswith("check_")]
405  for name, fcn in existing:
406  print("running {0}: {1}".format(name, fcn.__doc__))
407  with clean_working_directory():
408  if not fcn():
409  print("{0} failed".format(name))
410  failures += 1
411  else:
412  print("{0} passed".format(name))
413 
414  sys.exit(failures)
merge_files
Definition: merge_files.py:1