Belle II Software development
analyse.py
1
8
9from ipython_tools import handler
10import uproot
11import numpy as np
12import os.path
13from subprocess import check_output, CalledProcessError, STDOUT
14
15from shutil import copy
16
17from tracking.adjustments import adjust_module
18from tracking.run.event_generation import ReadOrGenerateEventsRun
19from tracking.validation.run import TrackingValidationRun
20
21from ROOT import Belle2
22
23
24class PDF:
25 """
26 Helper class to show a PDF file in a jupyter notebook.
27 """
28
29 def __init__(self, pdf, size=(600, 700)):
30 """
31 Show a PDF file.
32 :param pdf: The filename of the PDF file.
33 :param size: The size to use.
34 """
35
36 self.pdf = pdf
37
38 self.size = size
39
40 def _repr_html_(self):
41 """HTML representation"""
42 return f'<iframe src={self.pdf} width={self.size[0]} height={self.size[1]}></iframe>'
43
44 def _repr_latex_(self):
45 """LaTeX representation"""
46 return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)
47
48
50 """
51 Class for training and analysing a tracking module, which has a MVA filter in it.
52
53 Works best, if you are on a jupyter ntoebook.
54
55 You need to supply a run_class, which includes all needed settings, on how to
56 train and execute the module. This class will be mixed in with the normal trackfindingcdc
57 run classes, so you can add the setting (e.g. tracking_coverage etc.) as normal.
58
59 One examples is:
60
61 class TestClass:
62 # This module will be trained
63 recording_module = "FilterBasedVXDCDCTrackMerger"
64 # This is the name of the parameter of this module, which will be set to "mva" etc.
65 recording_parameter = "filter"
66
67 # These mva cuts will be tested during evaluation.
68 evaluation_cuts = [0.1, 0.2, ...]
69
70 tracking_coverage = {
71 'UsePXDHits': True,
72 'UseSVDHits': True,
73 'UseCDCHits': True,
74 }
75
76 # Some options, which will control the run classes
77 fit_tracks = False
78 generator_module = "EvtGenInput"
79
80 # This will be added to the "normal" path, to record the training data (you do not have to set the module to
81 # recording, as this is done automatically).
82 def add_recording_modules(self, path):
83 mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
84 RecoTracksStoreArrayName='MCRecoTracks',
85 WhichParticles=[])
86
87 path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
88 prRecoTracksStoreArrayName="CDCRecoTracks", UseCDCHits=True, UsePXDHits=False, UseSVDHits=False)
89 path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
90 prRecoTracksStoreArrayName="VXDRecoTracks", UseCDCHits=False, UsePXDHits=True, UseSVDHits=True)
91
92 # Merge CDC and CXD tracks
93 path.add_module('FilterBasedVXDCDCTrackMerger',
94 extrapolate=False,
95 CDCRecoTrackStoreArrayName="CDCRecoTracks",
96 VXDRecoTrackStoreArrayName="VXDRecoTracks",
97 MergedRecoTrackStoreArrayName="RecoTracks")
98
99 return path
100
101 # This will be added to the "normal" path, to evaluate the mva cuts. In most cases, this is the same as the
102 # add_recording_modules (as the module parameters will be set automatically), but maybe you need
103 # more here...
104 def add_validation_modules(self, path):
105 mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
106 RecoTracksStoreArrayName='MCRecoTracks',
107 WhichParticles=[])
108
109 # Merge CDC and CXD tracks
110 path.add_module('FilterBasedVXDCDCTrackMerger',
111 extrapolate=True,
112 CDCRecoTrackStoreArrayName="CDCRecoTracks",
113 VXDRecoTrackStoreArrayName="VXDRecoTracks",
114 MergedRecoTrackStoreArrayName="PrefitRecoTracks")
115
116 path.add_module("SetupGenfitExtrapolation")
117
118 path.add_module("DAFRecoFitter", recoTracksStoreArrayName="PrefitRecoTracks")
119
120 path.add_module("TrackCreator", recoTrackColName="PrefitRecoTracks")
121
122 path.add_module("FittedTracksStorer", inputRecoTracksStoreArrayName="PrefitRecoTracks",
123 outputRecoTracksStoreArrayName="RecoTracks")
124
125 # We need to include the matching ourselves, as we have already a matching algorithm in place
126 path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
127 prRecoTracksStoreArrayName="RecoTracks", UseCDCHits=True, UsePXDHits=True, UseSVDHits=True)
128
129 return path
130 """
131
132 def __init__(self, run_class, use_jupyter=True):
133 """Constructor"""
134
135
136 self.run_class = run_class
137
138 self.use_jupyter = use_jupyter
139
140
141 self.recording_file_name = self.run_class.recording_module + ".root"
142
143
144 self.file_name_path, ext = os.path.splitext(self.recording_file_name)
145
146
147 self.training_file_name = self.file_name_path + "Training" + ext
148
149 self.test_file_name = self.file_name_path + "Testing" + ext
150
151
152 self.identifier_name = "FastBDT.weights.xml"
153
155
156
157 self.expert_file_name = self.file_name_path + "TestingExport" + ext
158
159
160 self.weight_data_location = Belle2.FileSystem.findFile(os.path.join("tracking/data",
161 self.run_class.weight_data_location))
162
163 def train(self):
164 """Record a training file, split it in two parts and call the training method of the mva package"""
165 if not os.path.exists(self.recording_file_name):
167
168 if not os.path.exists(self.training_file_name) or not os.path.exists(self.test_file_name):
170
172
174 """
175 Use the trained weight file and call the path again using different mva cuts. Validation using the
176 normal tracking validation modules.
177 """
179
180 try:
181 os.mkdir("results")
182 except FileExistsError:
183 pass
184
185 def create_path(mva_cut):
186 class ValidationRun(self.run_class, TrackingValidationRun):
187
188 def finder_module(self, path):
189 self.add_validation_modules(path)
190
191 if mva_cut != 999:
192 adjust_module(path, self.recording_module,
193 **{self.recording_parameter + "Parameters": {"cut": mva_cut},
194 self.recording_parameter: "mva"})
195 else:
196 adjust_module(path, self.recording_module, **{self.recording_parameter: "truth"})
197
198 output_file_name = f"results/validation_{mva_cut}.root"
199
200 run = ValidationRun()
201
202 if not os.path.exists(run.output_file_name):
203 return {"path": run.create_path()}
204 else:
205 return {"path": None}
206
207 assert self.use_jupyter
208
209 calculations = handler.process_parameter_space(create_path, mva_cut=self.run_class.evaluation_cuts + [999])
210 calculations.start()
211 calculations.wait_for_end()
212
213 return calculations
214
216 """
217 Evaluate the classification power on the test data set and produce a PDF.
218 """
219 if not os.path.exists(self.expert_file_name) or not os.path.exists(self.evaluation_file_name):
222
223 df = uproot.concatenate(
224 self.expert_file_name,
225 library='pd').merge(
226 uproot.concatenate(
227 self.test_file_name,
228 library='pd'),
229 left_index=True,
230 right_index=True)
231
232 if self.use_jupyter:
233 from IPython.display import display
234 display(PDF(self.evaluation_file_name, size=(800, 800)))
235
236 return df
237
239 """Call the mva training routine in the train file"""
240 try:
241 check_output(["trackfindingcdc_teacher", self.training_file_name])
242 except CalledProcessError as e:
243 raise RuntimeError(e.output)
244
246 """Split the recorded file into two halves: training and test file and write it back"""
247 # TODO: This seems to reorder the columns...
248 df = uproot.concatenate(self.recording_file_name, library='pd')
249 mask = np.random.rand(len(df)) < 0.5
250 training_sample = df[mask]
251 test_sample = df[~mask]
252
253 with uproot.recreate(self.training_file_name) as outfile:
254 outfile["records"] = training_sample
255 with uproot.recreate(self.test_file_name) as outfile:
256 outfile["records"] = test_sample
257
259 """
260 Create a path using the settings of the run_class and process it.
261 This will create a ROOT file with the recorded data.
262 """
263 recording_file_name = self.recording_file_name
264
265 class RecordRun(self.run_class, ReadOrGenerateEventsRun):
266
267 def create_path(self):
268 path = ReadOrGenerateEventsRun.create_path(self)
269
270 self.add_recording_modules(path)
271
272 adjust_module(path, self.recording_module,
273 **{self.recording_parameter + "Parameters": {"rootFileName": recording_file_name},
274 self.recording_parameter: "recording"})
275
276 return path
277
278 run = RecordRun()
279 path = run.create_path()
280
281 if self.use_jupyter:
282 calculation = handler.process(path)
283 calculation.start()
284 calculation.wait_for_end()
285
286 return calculation
287 else:
288 run.execute()
289
291 """Call the mva expert"""
292 try:
293 check_output(["basf2_mva_expert",
294 "--identifiers", self.identifier_name, self.weight_data_location,
295 "--datafiles", self.test_file_name,
296 "--outputfile", self.expert_file_name,
297 "--treename", "records"])
298 except CalledProcessError as e:
299 raise RuntimeError(e.output)
300
302 """Call the mva evaluation routine"""
303 try:
304 check_output(["basf2_mva_evaluate.py",
305 "--identifiers", self.identifier_name, self.weight_data_location,
306 "--train_datafiles", self.training_file_name,
307 "--datafiles", self.test_file_name,
308 "--treename", "records",
309 "--outputfile", self.evaluation_file_name],
310 stderr=STDOUT)
311 except CalledProcessError as e:
312 raise RuntimeError(e.output)
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151
run_class
cached copy of the run class
Definition: analyse.py:136
evaluation_file_name
cached name of the output PDF file
Definition: analyse.py:154
def _call_training_routine(self)
Definition: analyse.py:238
def evaluate_classification(self)
Definition: analyse.py:215
test_file_name
cached path with extension of the testing-output file
Definition: analyse.py:149
use_jupyter
cached flag to use jupyter notebook
Definition: analyse.py:138
recording_file_name
cached name of the output file
Definition: analyse.py:141
weight_data_location
cached path of the weight input data
Definition: analyse.py:160
expert_file_name
cached path with extension of the testing-export file
Definition: analyse.py:157
training_file_name
cached path without extension of the output file
Definition: analyse.py:147
def __init__(self, run_class, use_jupyter=True)
Definition: analyse.py:132
identifier_name
cached identifier
Definition: analyse.py:152
def _write_train_and_test_files(self)
Definition: analyse.py:245
def _call_evaluation_routine(self)
Definition: analyse.py:301
def _repr_html_(self)
Definition: analyse.py:40
pdf
cached copy of the pdf filename
Definition: analyse.py:36
size
cached copy of the size
Definition: analyse.py:38
def __init__(self, pdf, size=(600, 700))
Definition: analyse.py:29
def _repr_latex_(self)
Definition: analyse.py:44
Definition: train.py:1