Belle II Software development
root_output_split.py
1#!/usr/bin/env python3
2
3
10
11import basf2
12from ROOT import Belle2
13from b2test_utils import clean_working_directory, safe_process
14import subprocess
15import json
16import os
17
18# @cond internal_test
19
20
21class CreateDummyData(basf2.Module):
22 """Create some random data to have event size not be too small"""
23
24 def __init__(self, size, persistent=False):
25 super().__init__()
26 self.size = size // 8
27 durability = Belle2.DataStore.c_Persistent if persistent else Belle2.DataStore.c_Event
28 self.chunk_data = Belle2.PyStoreObj(Belle2.TestChunkData.Class(), durability)
29
30 def initialize(self):
31 self.chunk_data.registerInDataStore()
32
33 def event(self):
34 self.chunk_data.assign(Belle2.TestChunkData(self.size))
35
36
37def get_metadata(filename):
38 meta = subprocess.check_output(["b2file-metadata-show", "--json", filename])
39 return json.loads(meta)
40
41
42def check_jobfile(jobfile, expected_files):
43 """Check that all output files are in the jobfile"""
44 with open(jobfile) as inputfile:
45 data = json.load(inputfile)
46
47 seen = set()
48 for output in data['output_files']:
49 seen.add(output['filename'])
50 assert all(output['checks_passed']) is True, "Output file didn't pass all tests"
51 assert os.path.exists(output['filename']), "Output file doesn't exist"
52
53 assert seen == expected_files, "didn't find all expected files"
54
55
56if __name__ == "__main__":
57 basf2.logging.log_level = basf2.LogLevel.ERROR
58 basf2.logging.enable_summary(False)
59 basf2.set_random_seed("something important")
60 with clean_working_directory():
61 # create 2 files around 3 MB
62 Belle2.MetadataService.Instance().setJsonFileName("job-info1.json")
63 path = basf2.Path()
64 path.add_module("EventInfoSetter", evtNumList=550)
65 path.add_module(CreateDummyData(1024 * 10)) # 10 kb dummy data
66 path.add_module("RootOutput", outputFileName="test_split.root", buildIndex=False, updateFileCatalog=False,
67 compressionAlgorithm=0, compressionLevel=0, outputSplitSize=3)
68 assert safe_process(path) == 0, "RootOutput failed"
69 check_jobfile("job-info1.json", {f"test_split.f0000{i}.root" for i in range(2)})
70
71 # check metadata
72 meta = [get_metadata(e) for e in ["test_split.f00000.root", "test_split.f00001.root"]]
73 assert meta[0]["parents"] == [], "There should be no parents"
74 assert meta[1]["parents"] == [], "There should be no parents"
75
76 # check files and set a well known lfn
77 for i in range(2):
78 subprocess.check_call(["b2file-metadata-add", "-l", f"parent{i}", f"test_split.f0000{i}.root"])
79 assert os.path.exists("test_split.f00002.root") is False, "There should not be a third file"
80
81 # create 3 files around 2 MB from the previous files
82 Belle2.MetadataService.Instance().setJsonFileName("job-info2.json")
83 path = basf2.Path()
84 path.add_module("RootInput", inputFileNames=["test_split.f00000.root", "test_split.f00001.root"])
85 path.add_module("RootOutput", outputFileName="file://test_parents.root", buildIndex=False, updateFileCatalog=False,
86 compressionAlgorithm=0, compressionLevel=0, outputSplitSize=2)
87 assert safe_process(path) == 0, "RootInput/Output failed"
88
89 check_jobfile("job-info2.json", {f"test_parents.f0000{i}.root" for i in range(3)})
90 assert os.path.exists("test_split.f00003.root") is False, "There should not be a fourth file"
91
92 # check metadata
93 meta = [get_metadata(e) for e in ["test_parents.f00000.root", "test_parents.f00001.root", "test_parents.f00002.root"]]
94 assert meta[0]["parents"] == ["parent0"], "parents wrong"
95 assert meta[1]["parents"] == ["parent0", "parent1"], "parents wrong"
96 assert meta[2]["parents"] == ["parent1"], "parents wrong"
97
98 last = 0
99 for m in meta:
100 assert m["eventLow"] == last + 1, "eventLow is not correct"
101 last = m["eventHigh"]
102 assert m["eventLow"] + m["nEvents"] - 1 == m["eventHigh"], "event high is inconsistent"
103 assert m["mcEvents"] == 0, "MC events cannot be saved"
104
105 assert sum(e["nEvents"] for e in meta) == 550, "number of events must be correct"
106
107 # check what happens with other extensions
108 check_filenames = {
109 # no extension: just add one
110 "test_noext": "test_noext.f00000.root",
111 # any other extension: replace
112 "test_otherext.foo": "test_otherext.f00000.root",
113 # but keep paremeters or anchors in urls untouched. TFile::Open ignores
114 # them for file:// urls but they are printed on the command line
115 "file://test_param?foo=bar": "test_param.f00000.root",
116 "file://test_anchor#foo": "test_anchor.f00000.root",
117 }
118 basf2.logging.log_level = basf2.LogLevel.INFO
119 Belle2.MetadataService.Instance().setJsonFileName("")
120 for name, result in check_filenames.items():
121 with clean_working_directory():
122 path = basf2.Path()
123 path.add_module("EventInfoSetter")
124 path.add_module(CreateDummyData(10, True)) # 10 byte persistent dummy data
125 path.add_module("RootOutput", outputFileName=name, outputSplitSize=1, updateFileCatalog=False)
126 safe_process(path)
127 assert os.listdir() == [result], "wrong output file name"
128
129# @endcond
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
Storable object which can be filled with random chunk data of a certain size.
Definition: TestChunkData.h:22