Belle II Software development
MVATeacherAndAnalyser Class Reference

Public Member Functions

 __init__ (self, run_class, use_jupyter=True)
 
 train (self)
 
 evaluate_tracking (self)
 
 evaluate_classification (self)
 

Public Attributes

 run_class = run_class
 cached copy of the run class
 
 use_jupyter = use_jupyter
 cached flag to use jupyter notebook
 
str recording_file_name = self.run_class.recording_module + ".root"
 cached name of the output file
 
 file_name_path
 cached path without extension of the output file
 
str training_file_name = self.file_name_path + "Training" + ext
 cached path with extension of the training-output file
 
str test_file_name = self.file_name_path + "Testing" + ext
 cached path with extension of the testing-output file
 
str identifier_name = "FastBDT.weights.xml"
 cached identifier
 
str evaluation_file_name = self.identifier_name + ".pdf"
 cached name of the output PDF file
 
str expert_file_name = self.file_name_path + "TestingExport" + ext
 cached path with extension of the testing-export file
 
 weight_data_location
 cached path of the weight input data
 
 recording_module = 999:
 
}) recording_parameter
 

Protected Member Functions

 _call_training_routine (self)
 
 _write_train_and_test_files (self)
 
 _create_records_file (self)
 
 _call_expert_routine (self)
 
 _call_evaluation_routine (self)
 

Detailed Description

Class for training and analysing a tracking module, which has a MVA filter in it. Works best, if you are on a jupyter ntoebook. You need to supply a run_class, which includes all needed settings, on how to train and execute the module. This class will be mixed in with the normal trackfindingcdc run classes, so you can add the setting (e.g. tracking_coverage etc.) as normal. One examples is: class TestClass: # This module will be trained recording_module = "FilterBasedVXDCDCTrackMerger" # This is the name of the parameter of this module, which will be set to "mva" etc. recording_parameter = "filter" # These mva cuts will be tested during evaluation. evaluation_cuts = [0.1, 0.2, ...] tracking_coverage = { 'UsePXDHits': True, 'UseSVDHits': True, 'UseCDCHits': True, } # Some options, which will control the run classes fit_tracks = False generator_module = "EvtGenInput" # This will be added to the "normal" path, to record the training data (you do not have to set the module to # recording, as this is done automatically). def add_recording_modules(self, path): mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks', RecoTracksStoreArrayName='MCRecoTracks', WhichParticles=[]) path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks", prRecoTracksStoreArrayName="CDCRecoTracks", UseCDCHits=True, UsePXDHits=False, UseSVDHits=False) path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks", prRecoTracksStoreArrayName="VXDRecoTracks", UseCDCHits=False, UsePXDHits=True, UseSVDHits=True) # Merge CDC and CXD tracks path.add_module('FilterBasedVXDCDCTrackMerger', extrapolate=False, CDCRecoTrackStoreArrayName="CDCRecoTracks", VXDRecoTrackStoreArrayName="VXDRecoTracks", MergedRecoTrackStoreArrayName="RecoTracks") return path # This will be added to the "normal" path, to evaluate the mva cuts. In most cases, this is the same as the # add_recording_modules (as the module parameters will be set automatically), but maybe you need # more here... def add_validation_modules(self, path): mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks', RecoTracksStoreArrayName='MCRecoTracks', WhichParticles=[]) # Merge CDC and CXD tracks path.add_module('FilterBasedVXDCDCTrackMerger', extrapolate=True, CDCRecoTrackStoreArrayName="CDCRecoTracks", VXDRecoTrackStoreArrayName="VXDRecoTracks", MergedRecoTrackStoreArrayName="PrefitRecoTracks") path.add_module("SetupGenfitExtrapolation") path.add_module("DAFRecoFitter", recoTracksStoreArrayName="PrefitRecoTracks") path.add_module("TrackCreator", recoTrackColName="PrefitRecoTracks") path.add_module("FittedTracksStorer", inputRecoTracksStoreArrayName="PrefitRecoTracks", outputRecoTracksStoreArrayName="RecoTracks") # We need to include the matching ourselves, as we have already a matching algorithm in place path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks", prRecoTracksStoreArrayName="RecoTracks", UseCDCHits=True, UsePXDHits=True, UseSVDHits=True) return path

Definition at line 49 of file analyse.py.

Constructor & Destructor Documentation

◆ __init__()

__init__ ( self,
run_class,
use_jupyter = True )
Constructor

Definition at line 132 of file analyse.py.

132 def __init__(self, run_class, use_jupyter=True):
133 """Constructor"""
134
135 ## cached copy of the run class
136 self.run_class = run_class
137 ## cached flag to use jupyter notebook
138 self.use_jupyter = use_jupyter
139
140 ## cached name of the output file
141 self.recording_file_name = self.run_class.recording_module + ".root"
142
143 ## cached path without extension of the output file
144 self.file_name_path, ext = os.path.splitext(self.recording_file_name)
145
146 ## cached path with extension of the training-output file
147 self.training_file_name = self.file_name_path + "Training" + ext
148 ## cached path with extension of the testing-output file
149 self.test_file_name = self.file_name_path + "Testing" + ext
150
151 ## cached identifier
152 self.identifier_name = "FastBDT.weights.xml"
153 ## cached name of the output PDF file
154 self.evaluation_file_name = self.identifier_name + ".pdf"
155
156 ## cached path with extension of the testing-export file
157 self.expert_file_name = self.file_name_path + "TestingExport" + ext
158
159 ## cached path of the weight input data
160 self.weight_data_location = Belle2.FileSystem.findFile(os.path.join("tracking/data",
161 self.run_class.weight_data_location))
162

Member Function Documentation

◆ _call_evaluation_routine()

_call_evaluation_routine ( self)
protected
Call the mva evaluation routine

Definition at line 301 of file analyse.py.

301 def _call_evaluation_routine(self):
302 """Call the mva evaluation routine"""
303 try:
304 check_output(["basf2_mva_evaluate.py",
305 "--identifiers", self.identifier_name, self.weight_data_location,
306 "--train_datafiles", self.training_file_name,
307 "--datafiles", self.test_file_name,
308 "--treename", "records",
309 "--outputfile", self.evaluation_file_name],
310 stderr=STDOUT)
311 except CalledProcessError as e:
312 raise RuntimeError(e.output)

◆ _call_expert_routine()

_call_expert_routine ( self)
protected
Call the mva expert

Definition at line 290 of file analyse.py.

290 def _call_expert_routine(self):
291 """Call the mva expert"""
292 try:
293 check_output(["basf2_mva_expert",
294 "--identifiers", self.identifier_name, self.weight_data_location,
295 "--datafiles", self.test_file_name,
296 "--outputfile", self.expert_file_name,
297 "--treename", "records"])
298 except CalledProcessError as e:
299 raise RuntimeError(e.output)
300

◆ _call_training_routine()

_call_training_routine ( self)
protected
Call the mva training routine in the train file

Definition at line 238 of file analyse.py.

238 def _call_training_routine(self):
239 """Call the mva training routine in the train file"""
240 try:
241 check_output(["trackfindingcdc_teacher", self.training_file_name])
242 except CalledProcessError as e:
243 raise RuntimeError(e.output)
244

◆ _create_records_file()

_create_records_file ( self)
protected
Create a path using the settings of the run_class and process it. This will create a ROOT file with the recorded data.

Definition at line 258 of file analyse.py.

258 def _create_records_file(self):
259 """
260 Create a path using the settings of the run_class and process it.
261 This will create a ROOT file with the recorded data.
262 """
263 recording_file_name = self.recording_file_name
264
265 class RecordRun(self.run_class, ReadOrGenerateEventsRun):
266
267 def create_path(self):
268 path = ReadOrGenerateEventsRun.create_path(self)
269
270 self.add_recording_modules(path)
271
272 adjust_module(path, self.recording_module,
273 **{self.recording_parameter + "Parameters": {"rootFileName": recording_file_name},
274 self.recording_parameter: "recording"})
275
276 return path
277
278 run = RecordRun()
279 path = run.create_path()
280
281 if self.use_jupyter:
282 calculation = handler.process(path)
283 calculation.start()
284 calculation.wait_for_end()
285
286 return calculation
287 else:
288 run.execute()
289

◆ _write_train_and_test_files()

_write_train_and_test_files ( self)
protected
Split the recorded file into two halves: training and test file and write it back

Definition at line 245 of file analyse.py.

245 def _write_train_and_test_files(self):
246 """Split the recorded file into two halves: training and test file and write it back"""
247 # TODO: This seems to reorder the columns...
248 df = uproot.concatenate(self.recording_file_name, library='pd')
249 mask = np.random.rand(len(df)) < 0.5
250 training_sample = df[mask]
251 test_sample = df[~mask]
252
253 with uproot.recreate(self.training_file_name) as outfile:
254 outfile["records"] = training_sample
255 with uproot.recreate(self.test_file_name) as outfile:
256 outfile["records"] = test_sample
257

◆ evaluate_classification()

evaluate_classification ( self)
Evaluate the classification power on the test data set and produce a PDF.

Definition at line 215 of file analyse.py.

215 def evaluate_classification(self):
216 """
217 Evaluate the classification power on the test data set and produce a PDF.
218 """
219 if not os.path.exists(self.expert_file_name) or not os.path.exists(self.evaluation_file_name):
220 self._call_evaluation_routine()
221 self._call_expert_routine()
222
223 df = uproot.concatenate(
224 self.expert_file_name,
225 library='pd').merge(
226 uproot.concatenate(
227 self.test_file_name,
228 library='pd'),
229 left_index=True,
230 right_index=True)
231
232 if self.use_jupyter:
233 from IPython.display import display
234 display(PDF(self.evaluation_file_name, size=(800, 800)))
235
236 return df
237

◆ evaluate_tracking()

evaluate_tracking ( self)
Use the trained weight file and call the path again using different mva cuts. Validation using the normal tracking validation modules.

Definition at line 173 of file analyse.py.

173 def evaluate_tracking(self):
174 """
175 Use the trained weight file and call the path again using different mva cuts. Validation using the
176 normal tracking validation modules.
177 """
178 copy(self.identifier_name, self.weight_data_location)
179
180 try:
181 os.mkdir("results")
182 except FileExistsError:
183 pass
184
185 def create_path(mva_cut):
186 class ValidationRun(self.run_class, TrackingValidationRun):
187
188 def finder_module(self, path):
189 self.add_validation_modules(path)
190
191 if mva_cut != 999:
192 adjust_module(path, self.recording_module,
193 **{self.recording_parameter + "Parameters": {"cut": mva_cut},
194 self.recording_parameter: "mva"})
195 else:
196 adjust_module(path, self.recording_module, **{self.recording_parameter: "truth"})
197
198 output_file_name = f"results/validation_{mva_cut}.root"
199
200 run = ValidationRun()
201
202 if not os.path.exists(run.output_file_name):
203 return {"path": run.create_path()}
204 else:
205 return {"path": None}
206
207 assert self.use_jupyter
208
209 calculations = handler.process_parameter_space(create_path, mva_cut=self.run_class.evaluation_cuts + [999])
210 calculations.start()
211 calculations.wait_for_end()
212
213 return calculations
214

◆ train()

train ( self)
Record a training file, split it in two parts and call the training method of the mva package

Definition at line 163 of file analyse.py.

163 def train(self):
164 """Record a training file, split it in two parts and call the training method of the mva package"""
165 if not os.path.exists(self.recording_file_name):
166 self._create_records_file()
167
168 if not os.path.exists(self.training_file_name) or not os.path.exists(self.test_file_name):
169 self._write_train_and_test_files()
170
171 self._call_training_routine()
172
Definition train.py:1

Member Data Documentation

◆ evaluation_file_name

evaluation_file_name = self.identifier_name + ".pdf"

cached name of the output PDF file

Definition at line 154 of file analyse.py.

◆ expert_file_name

expert_file_name = self.file_name_path + "TestingExport" + ext

cached path with extension of the testing-export file

Definition at line 157 of file analyse.py.

◆ file_name_path

file_name_path

cached path without extension of the output file

Definition at line 144 of file analyse.py.

◆ identifier_name

identifier_name = "FastBDT.weights.xml"

cached identifier

Definition at line 152 of file analyse.py.

◆ recording_file_name

recording_file_name = self.run_class.recording_module + ".root"

cached name of the output file

Definition at line 141 of file analyse.py.

◆ recording_module

recording_module = 999:

Definition at line 192 of file analyse.py.

◆ recording_parameter

}) recording_parameter

Definition at line 194 of file analyse.py.

◆ run_class

run_class = run_class

cached copy of the run class

Definition at line 136 of file analyse.py.

◆ test_file_name

test_file_name = self.file_name_path + "Testing" + ext

cached path with extension of the testing-output file

Definition at line 149 of file analyse.py.

◆ training_file_name

training_file_name = self.file_name_path + "Training" + ext

cached path with extension of the training-output file

Definition at line 147 of file analyse.py.

◆ use_jupyter

use_jupyter = use_jupyter

cached flag to use jupyter notebook

Definition at line 138 of file analyse.py.

◆ weight_data_location

weight_data_location
Initial value:
= Belle2.FileSystem.findFile(os.path.join("tracking/data",
self.run_class.weight_data_location))
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...

cached path of the weight input data

Definition at line 160 of file analyse.py.


The documentation for this class was generated from the following file: