Belle II Software  light-2403-persian
root_output_split.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2
12 from ROOT import Belle2
13 from b2test_utils import clean_working_directory, safe_process
14 import subprocess
15 import json
16 import os
17 
18 # @cond internal_test
19 
20 
21 class CreateDummyData(basf2.Module):
22  """Create some random data to have event size not be too small"""
23 
24  def __init__(self, size, persistent=False):
25  super().__init__()
26  self.size = size // 8
27  durability = Belle2.DataStore.c_Persistent if persistent else Belle2.DataStore.c_Event
28  self.chunk_data = Belle2.PyStoreObj(Belle2.TestChunkData.Class(), durability)
29 
30  def initialize(self):
31  self.chunk_data.registerInDataStore()
32 
33  def event(self):
34  self.chunk_data.assign(Belle2.TestChunkData(self.size))
35 
36 
37 def get_metadata(filename):
38  meta = subprocess.check_output(["b2file-metadata-show", "--json", filename])
39  return json.loads(meta)
40 
41 
42 def check_jobfile(jobfile, expected_files):
43  """Check that all output files are in the jobfile"""
44  with open(jobfile) as inputfile:
45  data = json.load(inputfile)
46 
47  seen = set()
48  for output in data['output_files']:
49  seen.add(output['filename'])
50  assert all(output['checks_passed']) is True, "Output file didn't pass all tests"
51  assert os.path.exists(output['filename']), "Output file doesn't exist"
52 
53  assert seen == expected_files, "didn't find all expected files"
54 
55 
56 if __name__ == "__main__":
57  basf2.logging.log_level = basf2.LogLevel.ERROR
58  basf2.logging.enable_summary(False)
59  basf2.set_random_seed("something important")
60  with clean_working_directory():
61  # create 2 files around 3 MB
62  Belle2.MetadataService.Instance().setJsonFileName("job-info1.json")
63  path = basf2.Path()
64  path.add_module("EventInfoSetter", evtNumList=550)
65  path.add_module(CreateDummyData(1024 * 10)) # 10 kb dummy data
66  path.add_module("RootOutput", outputFileName="test_split.root", buildIndex=False, updateFileCatalog=False,
67  compressionAlgorithm=0, compressionLevel=0, outputSplitSize=3)
68  assert safe_process(path) == 0, "RootOutput failed"
69  check_jobfile("job-info1.json", {f"test_split.f0000{i}.root" for i in range(2)})
70 
71  # check metadata
72  meta = [get_metadata(e) for e in ["test_split.f00000.root", "test_split.f00001.root"]]
73  assert meta[0]["parents"] == [], "There should be no parents"
74  assert meta[1]["parents"] == [], "There should be no parents"
75 
76  # check files and set a well known lfn
77  for i in range(2):
78  subprocess.check_call(["b2file-metadata-add", "-l", f"parent{i}", f"test_split.f0000{i}.root"])
79  assert os.path.exists("test_split.f00002.root") is False, "There should not be a third file"
80 
81  # create 3 files around 2 MB from the previous files
82  Belle2.MetadataService.Instance().setJsonFileName("job-info2.json")
83  path = basf2.Path()
84  path.add_module("RootInput", inputFileNames=["test_split.f00000.root", "test_split.f00001.root"])
85  path.add_module("RootOutput", outputFileName="file://test_parents.root", buildIndex=False, updateFileCatalog=False,
86  compressionAlgorithm=0, compressionLevel=0, outputSplitSize=2)
87  assert safe_process(path) == 0, "RootInput/Output failed"
88 
89  check_jobfile("job-info2.json", {f"test_parents.f0000{i}.root" for i in range(3)})
90  assert os.path.exists("test_split.f00003.root") is False, "There should not be a fourth file"
91 
92  # check metadata
93  meta = [get_metadata(e) for e in ["test_parents.f00000.root", "test_parents.f00001.root", "test_parents.f00002.root"]]
94  assert meta[0]["parents"] == ["parent0"], "parents wrong"
95  assert meta[1]["parents"] == ["parent0", "parent1"], "parents wrong"
96  assert meta[2]["parents"] == ["parent1"], "parents wrong"
97 
98  last = 0
99  for m in meta:
100  assert m["eventLow"] == last + 1, "eventLow is not correct"
101  last = m["eventHigh"]
102  assert m["eventLow"] + m["nEvents"] - 1 == m["eventHigh"], "event high is inconsistent"
103  assert m["mcEvents"] == 0, "MC events cannot be saved"
104 
105  assert sum(e["nEvents"] for e in meta) == 550, "number of events must be correct"
106 
107  # check what happens with other extensions
108  check_filenames = {
109  # no extension: just add one
110  "test_noext": "test_noext.f00000.root",
111  # any other extension: replace
112  "test_otherext.foo": "test_otherext.f00000.root",
113  # but keep paremeters or anchors in urls untouched. TFile::Open ignores
114  # them for file:// urls but they are printed on the command line
115  "file://test_param?foo=bar": "test_param.f00000.root",
116  "file://test_anchor#foo": "test_anchor.f00000.root",
117  }
118  basf2.logging.log_level = basf2.LogLevel.INFO
119  Belle2.MetadataService.Instance().setJsonFileName("")
120  for name, result in check_filenames.items():
121  with clean_working_directory():
122  path = basf2.Path()
123  path.add_module("EventInfoSetter")
124  path.add_module(CreateDummyData(10, True)) # 10 byte persistent dummy data
125  path.add_module("RootOutput", outputFileName=name, outputSplitSize=1, updateFileCatalog=False)
126  safe_process(path)
127  assert os.listdir() == [result], "wrong output file name"
128 
129 # @endcond
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
Storable object which can be filled with random chunk data of a certain size.
Definition: TestChunkData.h:22