Belle II Software  release-05-02-19
root_output_split.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 import basf2
5 from ROOT import Belle2
6 from b2test_utils import clean_working_directory, safe_process
7 import subprocess
8 import json
9 import os
10 
11 # @cond internal_test
12 
13 
14 class CreateDummyData(basf2.Module):
15  """Create some random data to have event size not be too small"""
16 
17  def __init__(self, size, persistent=False):
18  super().__init__()
19  self.size = size // 8
20  durability = Belle2.DataStore.c_Persistent if persistent else Belle2.DataStore.c_Event
21  self.chunk_data = Belle2.PyStoreObj(Belle2.TestChunkData.Class(), durability)
22 
23  def initialize(self):
24  self.chunk_data.registerInDataStore()
25 
26  def event(self):
27  self.chunk_data.assign(Belle2.TestChunkData(self.size))
28 
29 
30 def get_metadata(filename):
31  meta = subprocess.check_output(["b2file-metadata-show", "--json", filename])
32  return json.loads(meta)
33 
34 
35 def check_jobfile(jobfile, expected_files):
36  """Check that all output files are in the jobfile"""
37  with open(jobfile) as inputfile:
38  data = json.load(inputfile)
39 
40  seen = set()
41  for output in data['output_files']:
42  seen.add(output['filename'])
43  assert all(output['checks_passed']) is True, "Output file didn't pass all tests"
44  assert os.path.exists(output['filename']), "Output file doesn't exist"
45 
46  assert seen == expected_files, "didn't find all expected files"
47 
48 
49 if __name__ == "__main__":
50  basf2.logging.log_level = basf2.LogLevel.ERROR
51  basf2.logging.enable_summary(False)
52  basf2.set_random_seed("something important")
53  with clean_working_directory():
54  # create 2 files around 3 MB
55  Belle2.MetadataService.Instance().setJsonFileName("job-info1.json")
56  path = basf2.Path()
57  path.add_module("EventInfoSetter", evtNumList=550)
58  path.add_module(CreateDummyData(1024 * 10)) # 10 kb dummy data
59  path.add_module("RootOutput", outputFileName="test_split.root", buildIndex=False, updateFileCatalog=False,
60  compressionAlgorithm=0, compressionLevel=0, outputSplitSize=3)
61  assert safe_process(path) == 0, "RootOutput failed"
62  check_jobfile("job-info1.json", {f"test_split.f0000{i}.root" for i in range(2)})
63 
64  # check files and set a well known lfn
65  for i in range(2):
66  subprocess.check_call(["b2file-metadata-add", "-l", f"parent{i}", f"test_split.f0000{i}.root"])
67  assert os.path.exists("test_split.f00002.root") is False, "There should not be a third file"
68 
69  # create 3 files around 2 MB from the previous files
70  Belle2.MetadataService.Instance().setJsonFileName("job-info2.json")
71  path = basf2.Path()
72  path.add_module("RootInput", inputFileNames=["test_split.f00000.root", "test_split.f00001.root"])
73  path.add_module("RootOutput", outputFileName="file://test_parents.root", buildIndex=False, updateFileCatalog=False,
74  compressionAlgorithm=0, compressionLevel=0, outputSplitSize=2)
75  assert safe_process(path) == 0, "RootInput/Output failed"
76 
77  check_jobfile("job-info2.json", {f"test_parents.f0000{i}.root" for i in range(3)})
78  assert os.path.exists("test_split.f00003.root") is False, "There should not be a fourth file"
79 
80  # check metadata
81  meta = [get_metadata(e) for e in ["test_parents.f00000.root", "test_parents.f00001.root", "test_parents.f00002.root"]]
82  assert meta[0]["parents"] == ["parent0"], "parents wrong"
83  assert meta[1]["parents"] == ["parent0", "parent1"], "parents wrong"
84  assert meta[2]["parents"] == ["parent1"], "parents wrong"
85 
86  last = 0
87  for m in meta:
88  assert m["eventLow"] == last + 1, "eventLow is not correct"
89  last = m["eventHigh"]
90  assert m["eventLow"] + m["nEvents"] - 1 == m["eventHigh"], "event high is inconsistent"
91  assert m["mcEvents"] == 0, "MC events cannot be saved"
92 
93  assert sum(e["nEvents"] for e in meta) == 550, "number of events must be correct"
94 
95  # check what happens with other extensions
96  check_filenames = {
97  # no extension: just add one
98  "test_noext": "test_noext.f00000.root",
99  # any other extension: replace
100  "test_otherext.foo": "test_otherext.f00000.root",
101  # but keep paremeters or anchors in urls untouched. TFile::Open ignores
102  # them for file:// urls but they are printed on the command line
103  "file://test_param?foo=bar": "test_param.f00000.root",
104  "file://test_anchor#foo": "test_anchor.f00000.root",
105  }
106  basf2.logging.log_level = basf2.LogLevel.INFO
107  Belle2.MetadataService.Instance().setJsonFileName("")
108  for name, result in check_filenames.items():
109  with clean_working_directory():
110  path = basf2.Path()
111  path.add_module("EventInfoSetter")
112  path.add_module(CreateDummyData(10, True)) # 10 byte persistent dummy data
113  path.add_module("RootOutput", outputFileName=name, outputSplitSize=1, updateFileCatalog=False)
114  safe_process(path)
115  assert os.listdir() == [result], "wrong output file name"
116 
117 # @endcond
Belle2::TestChunkData
Storable object which can be filled with random chunk data of a certain size.
Definition: TestChunkData.h:32
Belle2::PyStoreObj
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:69
root_input.BrokenEventsModule.__init__
def __init__(self, accepted)
Definition: root_input.py:138
root_input.BrokenEventsModule.event
def event(self)
Definition: root_input.py:150
Belle2::MetadataService::Instance
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
Definition: MetadataService.cc:28