Belle II Software  release-05-01-25
analyse.py
1 from ipython_tools import handler
2 from root_pandas import read_root, to_root
3 import numpy as np
4 import os.path
5 from subprocess import check_output, CalledProcessError, STDOUT
6 
7 from shutil import copy
8 
9 from tracking.adjustments import adjust_module
10 from tracking.run.event_generation import ReadOrGenerateEventsRun
11 from tracking.validation.run import TrackingValidationRun
12 
13 from ROOT import Belle2
14 
15 
16 class PDF:
17  """
18  Helper class to show a PDF file in a jupyter notebook.
19  """
20 
21  def __init__(self, pdf, size=(600, 700)):
22  """
23  Show a PDF file.
24  :param pdf: The filename of the PDF file.
25  :param size: The size to use.
26  """
27 
28  self.pdf = pdf
29 
30  self.size = size
31 
32  def _repr_html_(self):
33  """HTML representation"""
34  return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)
35 
36  def _repr_latex_(self):
37  """LaTeX representation"""
38  return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)
39 
40 
42  """
43  Class for training and analysing a tracking module, which has a MVA filter in it.
44 
45  Works best, if you are on a jupyter ntoebook.
46 
47  You need to supply a run_class, which includes all needed settings, on how to
48  train and execute the module. This class will be mixed in with the normal trackfindingcdc
49  run classes, so you can add the setting (e.g. tracking_coverage etc.) as normal.
50 
51  One examples is:
52 
53  class TestClass:
54  # This module will be trained
55  recording_module = "FilterBasedVXDCDCTrackMerger"
56  # This is the name of the parameter of this module, which will be set to "mva" etc.
57  recording_parameter = "filter"
58 
59  # These mva cuts will be tested during evaluation.
60  evaluation_cuts = [0.1, 0.2, ...]
61 
62  tracking_coverage = {
63  'UsePXDHits': True,
64  'UseSVDHits': True,
65  'UseCDCHits': True,
66  }
67 
68  # Some options, which will control the run classes
69  fit_tracks = False
70  generator_module = "EvtGenInput"
71 
72  # This will be added to the "normal" path, to record the training data (you do not have to set the module to
73  # recording, as this is done automatically).
74  def add_recording_modules(self, path):
75  mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
76  RecoTracksStoreArrayName='MCRecoTracks',
77  WhichParticles=[])
78 
79  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
80  prRecoTracksStoreArrayName="CDCRecoTracks", UseCDCHits=True, UsePXDHits=False, UseSVDHits=False)
81  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
82  prRecoTracksStoreArrayName="VXDRecoTracks", UseCDCHits=False, UsePXDHits=True, UseSVDHits=True)
83 
84  # Merge CDC and CXD tracks
85  path.add_module('FilterBasedVXDCDCTrackMerger',
86  extrapolate=False,
87  CDCRecoTrackStoreArrayName="CDCRecoTracks",
88  VXDRecoTrackStoreArrayName="VXDRecoTracks",
89  MergedRecoTrackStoreArrayName="RecoTracks")
90 
91  return path
92 
93  # This will be added to the "normal" path, to evaluate the mva cuts. In most cases, this is the same as the
94  # add_recording_modules (as the module parameters will be set automatically), but maybe you need
95  # more here...
96  def add_validation_modules(self, path):
97  mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
98  RecoTracksStoreArrayName='MCRecoTracks',
99  WhichParticles=[])
100 
101  # Merge CDC and CXD tracks
102  path.add_module('FilterBasedVXDCDCTrackMerger',
103  extrapolate=True,
104  CDCRecoTrackStoreArrayName="CDCRecoTracks",
105  VXDRecoTrackStoreArrayName="VXDRecoTracks",
106  MergedRecoTrackStoreArrayName="PrefitRecoTracks")
107 
108  path.add_module("SetupGenfitExtrapolation")
109 
110  path.add_module("DAFRecoFitter", recoTracksStoreArrayName="PrefitRecoTracks")
111 
112  path.add_module("TrackCreator", recoTrackColName="PrefitRecoTracks")
113 
114  path.add_module("FittedTracksStorer", inputRecoTracksStoreArrayName="PrefitRecoTracks",
115  outputRecoTracksStoreArrayName="RecoTracks")
116 
117  # We need to include the matching ourselves, as we have already a matching algorithm in place
118  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
119  prRecoTracksStoreArrayName="RecoTracks", UseCDCHits=True, UsePXDHits=True, UseSVDHits=True)
120 
121  return path
122  """
123 
124  def __init__(self, run_class, use_jupyter=True):
125  """Constructor"""
126 
127 
128  self.run_class = run_class
129 
130  self.use_jupyter = use_jupyter
131 
132 
133  self.recording_file_name = self.run_class.recording_module + ".root"
134 
135 
136  self.file_name_path, ext = os.path.splitext(self.recording_file_name)
137 
138 
139  self.training_file_name = self.file_name_path + "Training" + ext
140 
141  self.test_file_name = self.file_name_path + "Testing" + ext
142 
143 
144  self.identifier_name = "FastBDT.weights.xml"
145 
147 
148 
149  self.expert_file_name = self.file_name_path + "TestingExport" + ext
150 
151 
152  self.weight_data_location = Belle2.FileSystem.findFile(os.path.join("tracking/data",
153  self.run_class.weight_data_location))
154 
155  def train(self):
156  """Record a training file, split it in two parts and call the training method of the mva package"""
157  if not os.path.exists(self.recording_file_name):
158  self._create_records_file()
159 
160  if not os.path.exists(self.training_file_name) or not os.path.exists(self.test_file_name):
162 
164 
165  def evaluate_tracking(self):
166  """
167  Use the trained weight file and call the path again using different mva cuts. Validation using the
168  normal tracking validation modules.
169  """
170  copy(self.identifier_name, self.weight_data_location)
171 
172  try:
173  os.mkdir("results")
174  except FileExistsError:
175  pass
176 
177  def create_path(mva_cut):
178  class ValidationRun(self.run_class, TrackingValidationRun):
179 
180  def finder_module(self, path):
181  self.add_validation_modules(path)
182 
183  if mva_cut != 999:
184  adjust_module(path, self.recording_module,
185  **{self.recording_parameter + "Parameters": {"cut": mva_cut},
186  self.recording_parameter: "mva"})
187  else:
188  adjust_module(path, self.recording_module, **{self.recording_parameter: "truth"})
189 
190  output_file_name = "results/validation_{mva_cut}.root".format(mva_cut=mva_cut)
191 
192  run = ValidationRun()
193 
194  if not os.path.exists(run.output_file_name):
195  return {"path": run.create_path()}
196  else:
197  return {"path": None}
198 
199  assert self.use_jupyter
200 
201  calculations = handler.process_parameter_space(create_path, mva_cut=self.run_class.evaluation_cuts + [999])
202  calculations.start()
203  calculations.wait_for_end()
204 
205  return calculations
206 
208  """
209  Evaluate the classification power on the test data set and produce a PDF.
210  """
211  if not os.path.exists(self.expert_file_name) or not os.path.exists(self.evaluation_file_name):
213  self._call_expert_routine()
214 
215  df = read_root(self.expert_file_name).merge(read_root(self.test_file_name), left_index=True, right_index=True)
216 
217  if self.use_jupyter:
218  from IPython.display import display
219  display(PDF(self.evaluation_file_name, size=(800, 800)))
220 
221  return df
222 
224  """Call the mva training routine in the train file"""
225  try:
226  check_output(["trackfindingcdc_teacher", self.training_file_name])
227  except CalledProcessError as e:
228  raise RuntimeError(e.output)
229 
231  """Split the recorded file into two halves: training and test file and write it back"""
232  # TODO: This seems to reorder the columns...
233  df = read_root(self.recording_file_name)
234  mask = np.random.rand(len(df)) < 0.5
235  training_sample = df[mask]
236  test_sample = df[~mask]
237 
238  to_root(training_sample, self.training_file_name, tree_key="records")
239  to_root(test_sample, self.test_file_name, tree_key="records")
240 
242  """
243  Create a path using the settings of the run_class and process it.
244  This will create a ROOT file with the recorded data.
245  """
246  recording_file_name = self.recording_file_name
247 
248  class RecordRun(self.run_class, ReadOrGenerateEventsRun):
249 
250  def create_path(self):
251  path = ReadOrGenerateEventsRun.create_path(self)
252 
253  self.add_recording_modules(path)
254 
255  adjust_module(path, self.recording_module,
256  **{self.recording_parameter + "Parameters": {"rootFileName": recording_file_name},
257  self.recording_parameter: "recording"})
258 
259  return path
260 
261  run = RecordRun()
262  path = run.create_path()
263 
264  if self.use_jupyter:
265  calculation = handler.process(path)
266  calculation.start()
267  calculation.wait_for_end()
268 
269  return calculation
270  else:
271  run.execute()
272 
274  """Call the mva expert"""
275  try:
276  check_output(["basf2_mva_expert",
277  "--identifiers", self.identifier_name, self.weight_data_location,
278  "--datafiles", self.test_file_name,
279  "--outputfile", self.expert_file_name,
280  "--treename", "records"])
281  except CalledProcessError as e:
282  raise RuntimeError(e.output)
283 
285  """Call the mva evaluation routine"""
286  try:
287  check_output(["basf2_mva_evaluate.py",
288  "--identifiers", self.identifier_name, self.weight_data_location,
289  "--train_datafiles", self.training_file_name,
290  "--datafiles", self.test_file_name,
291  "--treename", "records",
292  "--outputfile", self.evaluation_file_name],
293  stderr=STDOUT)
294  except CalledProcessError as e:
295  raise RuntimeError(e.output)
analyse.MVATeacherAndAnalyser._write_train_and_test_files
def _write_train_and_test_files(self)
Definition: analyse.py:230
analyse.MVATeacherAndAnalyser.expert_file_name
expert_file_name
cached path with extension of the testing-export file
Definition: analyse.py:149
analyse.MVATeacherAndAnalyser
Definition: analyse.py:41
analyse.PDF
Definition: analyse.py:16
analyse.MVATeacherAndAnalyser._call_evaluation_routine
def _call_evaluation_routine(self)
Definition: analyse.py:284
analyse.PDF.size
size
cached copy of the size
Definition: analyse.py:30
tracking.adjustments
Definition: adjustments.py:1
analyse.MVATeacherAndAnalyser.weight_data_location
weight_data_location
cached path of the weight input data
Definition: analyse.py:152
tracking.validation.run
Definition: run.py:1
analyse.MVATeacherAndAnalyser.__init__
def __init__(self, run_class, use_jupyter=True)
Definition: analyse.py:124
analyse.PDF.__init__
def __init__(self, pdf, size=(600, 700))
Definition: analyse.py:21
Belle2::merge
std::vector< std::vector< double > > merge(std::vector< std::vector< std::vector< double >>> toMerge)
merge { vector<double> a, vector<double> b} into {a, b}
Definition: tools.h:44
tracking.run.event_generation.ReadOrGenerateEventsRun
Definition: event_generation.py:26
analyse.MVATeacherAndAnalyser.evaluate_tracking
def evaluate_tracking(self)
Definition: analyse.py:165
analyse.MVATeacherAndAnalyser.test_file_name
test_file_name
cached path with extension of the testing-output file
Definition: analyse.py:141
analyse.MVATeacherAndAnalyser.train
def train(self)
Definition: analyse.py:155
analyse.MVATeacherAndAnalyser.run_class
run_class
cached copy of the run class
Definition: analyse.py:128
analyse.MVATeacherAndAnalyser.evaluate_classification
def evaluate_classification(self)
Definition: analyse.py:207
display
Definition: display.py:1
analyse.MVATeacherAndAnalyser.use_jupyter
use_jupyter
cached flag to use jupyter notebook
Definition: analyse.py:130
tracking.run.event_generation
Definition: event_generation.py:1
analyse.MVATeacherAndAnalyser.identifier_name
identifier_name
cached identifier
Definition: analyse.py:144
analyse.MVATeacherAndAnalyser.training_file_name
training_file_name
cached path without extension of the output file
Definition: analyse.py:139
analyse.MVATeacherAndAnalyser._call_expert_routine
def _call_expert_routine(self)
Definition: analyse.py:273
analyse.MVATeacherAndAnalyser.recording_file_name
recording_file_name
cached name of the output file
Definition: analyse.py:133
Belle2::FileSystem::findFile
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:147
analyse.PDF._repr_latex_
def _repr_latex_(self)
Definition: analyse.py:36
analyse.MVATeacherAndAnalyser.evaluation_file_name
evaluation_file_name
cached name of the output PDF file
Definition: analyse.py:146
analyse.PDF._repr_html_
def _repr_html_(self)
Definition: analyse.py:32
analyse.MVATeacherAndAnalyser._create_records_file
def _create_records_file(self)
Definition: analyse.py:241
tracking.validation.run.TrackingValidationRun
Definition: run.py:18
analyse.MVATeacherAndAnalyser._call_training_routine
def _call_training_routine(self)
Definition: analyse.py:223
analyse.PDF.pdf
pdf
cached copy of the pdf filename
Definition: analyse.py:28