Belle II Software  release-06-02-00
root_output_split.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2
12 from ROOT import Belle2
13 from b2test_utils import clean_working_directory, safe_process
14 import subprocess
15 import json
16 import os
17 
18 # @cond internal_test
19 
20 
21 class CreateDummyData(basf2.Module):
22  """Create some random data to have event size not be too small"""
23 
24  def __init__(self, size, persistent=False):
25  super().__init__()
26  self.size = size // 8
27  durability = Belle2.DataStore.c_Persistent if persistent else Belle2.DataStore.c_Event
28  self.chunk_data = Belle2.PyStoreObj(Belle2.TestChunkData.Class(), durability)
29 
30  def initialize(self):
31  self.chunk_data.registerInDataStore()
32 
33  def event(self):
34  self.chunk_data.assign(Belle2.TestChunkData(self.size))
35 
36 
37 def get_metadata(filename):
38  meta = subprocess.check_output(["b2file-metadata-show", "--json", filename])
39  return json.loads(meta)
40 
41 
42 def check_jobfile(jobfile, expected_files):
43  """Check that all output files are in the jobfile"""
44  with open(jobfile) as inputfile:
45  data = json.load(inputfile)
46 
47  seen = set()
48  for output in data['output_files']:
49  seen.add(output['filename'])
50  assert all(output['checks_passed']) is True, "Output file didn't pass all tests"
51  assert os.path.exists(output['filename']), "Output file doesn't exist"
52 
53  assert seen == expected_files, "didn't find all expected files"
54 
55 
56 if __name__ == "__main__":
57  basf2.logging.log_level = basf2.LogLevel.ERROR
58  basf2.logging.enable_summary(False)
59  basf2.set_random_seed("something important")
60  with clean_working_directory():
61  # create 2 files around 3 MB
62  Belle2.MetadataService.Instance().setJsonFileName("job-info1.json")
63  path = basf2.Path()
64  path.add_module("EventInfoSetter", evtNumList=550)
65  path.add_module(CreateDummyData(1024 * 10)) # 10 kb dummy data
66  path.add_module("RootOutput", outputFileName="test_split.root", buildIndex=False, updateFileCatalog=False,
67  compressionAlgorithm=0, compressionLevel=0, outputSplitSize=3)
68  assert safe_process(path) == 0, "RootOutput failed"
69  check_jobfile("job-info1.json", {f"test_split.f0000{i}.root" for i in range(2)})
70 
71  # check files and set a well known lfn
72  for i in range(2):
73  subprocess.check_call(["b2file-metadata-add", "-l", f"parent{i}", f"test_split.f0000{i}.root"])
74  assert os.path.exists("test_split.f00002.root") is False, "There should not be a third file"
75 
76  # create 3 files around 2 MB from the previous files
77  Belle2.MetadataService.Instance().setJsonFileName("job-info2.json")
78  path = basf2.Path()
79  path.add_module("RootInput", inputFileNames=["test_split.f00000.root", "test_split.f00001.root"])
80  path.add_module("RootOutput", outputFileName="file://test_parents.root", buildIndex=False, updateFileCatalog=False,
81  compressionAlgorithm=0, compressionLevel=0, outputSplitSize=2)
82  assert safe_process(path) == 0, "RootInput/Output failed"
83 
84  check_jobfile("job-info2.json", {f"test_parents.f0000{i}.root" for i in range(3)})
85  assert os.path.exists("test_split.f00003.root") is False, "There should not be a fourth file"
86 
87  # check metadata
88  meta = [get_metadata(e) for e in ["test_parents.f00000.root", "test_parents.f00001.root", "test_parents.f00002.root"]]
89  assert meta[0]["parents"] == ["parent0"], "parents wrong"
90  assert meta[1]["parents"] == ["parent0", "parent1"], "parents wrong"
91  assert meta[2]["parents"] == ["parent1"], "parents wrong"
92 
93  last = 0
94  for m in meta:
95  assert m["eventLow"] == last + 1, "eventLow is not correct"
96  last = m["eventHigh"]
97  assert m["eventLow"] + m["nEvents"] - 1 == m["eventHigh"], "event high is inconsistent"
98  assert m["mcEvents"] == 0, "MC events cannot be saved"
99 
100  assert sum(e["nEvents"] for e in meta) == 550, "number of events must be correct"
101 
102  # check what happens with other extensions
103  check_filenames = {
104  # no extension: just add one
105  "test_noext": "test_noext.f00000.root",
106  # any other extension: replace
107  "test_otherext.foo": "test_otherext.f00000.root",
108  # but keep paremeters or anchors in urls untouched. TFile::Open ignores
109  # them for file:// urls but they are printed on the command line
110  "file://test_param?foo=bar": "test_param.f00000.root",
111  "file://test_anchor#foo": "test_anchor.f00000.root",
112  }
113  basf2.logging.log_level = basf2.LogLevel.INFO
114  Belle2.MetadataService.Instance().setJsonFileName("")
115  for name, result in check_filenames.items():
116  with clean_working_directory():
117  path = basf2.Path()
118  path.add_module("EventInfoSetter")
119  path.add_module(CreateDummyData(10, True)) # 10 byte persistent dummy data
120  path.add_module("RootOutput", outputFileName=name, outputSplitSize=1, updateFileCatalog=False)
121  safe_process(path)
122  assert os.listdir() == [result], "wrong output file name"
123 
124 # @endcond
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
Storable object which can be filled with random chunk data of a certain size.
Definition: TestChunkData.h:22