Belle II Software  release-08-01-10
analyse.py
1 
8 
9 from ipython_tools import handler
10 import uproot
11 import numpy as np
12 import os.path
13 from subprocess import check_output, CalledProcessError, STDOUT
14 
15 from shutil import copy
16 
17 from tracking.adjustments import adjust_module
18 from tracking.run.event_generation import ReadOrGenerateEventsRun
19 from tracking.validation.run import TrackingValidationRun
20 
21 from ROOT import Belle2
22 
23 
24 class PDF:
25  """
26  Helper class to show a PDF file in a jupyter notebook.
27  """
28 
29  def __init__(self, pdf, size=(600, 700)):
30  """
31  Show a PDF file.
32  :param pdf: The filename of the PDF file.
33  :param size: The size to use.
34  """
35 
36  self.pdfpdf = pdf
37 
38  self.sizesize = size
39 
40  def _repr_html_(self):
41  """HTML representation"""
42  return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdfpdf, self.sizesize)
43 
44  def _repr_latex_(self):
45  """LaTeX representation"""
46  return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdfpdf)
47 
48 
50  """
51  Class for training and analysing a tracking module, which has a MVA filter in it.
52 
53  Works best, if you are on a jupyter ntoebook.
54 
55  You need to supply a run_class, which includes all needed settings, on how to
56  train and execute the module. This class will be mixed in with the normal trackfindingcdc
57  run classes, so you can add the setting (e.g. tracking_coverage etc.) as normal.
58 
59  One examples is:
60 
61  class TestClass:
62  # This module will be trained
63  recording_module = "FilterBasedVXDCDCTrackMerger"
64  # This is the name of the parameter of this module, which will be set to "mva" etc.
65  recording_parameter = "filter"
66 
67  # These mva cuts will be tested during evaluation.
68  evaluation_cuts = [0.1, 0.2, ...]
69 
70  tracking_coverage = {
71  'UsePXDHits': True,
72  'UseSVDHits': True,
73  'UseCDCHits': True,
74  }
75 
76  # Some options, which will control the run classes
77  fit_tracks = False
78  generator_module = "EvtGenInput"
79 
80  # This will be added to the "normal" path, to record the training data (you do not have to set the module to
81  # recording, as this is done automatically).
82  def add_recording_modules(self, path):
83  mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
84  RecoTracksStoreArrayName='MCRecoTracks',
85  WhichParticles=[])
86 
87  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
88  prRecoTracksStoreArrayName="CDCRecoTracks", UseCDCHits=True, UsePXDHits=False, UseSVDHits=False)
89  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
90  prRecoTracksStoreArrayName="VXDRecoTracks", UseCDCHits=False, UsePXDHits=True, UseSVDHits=True)
91 
92  # Merge CDC and CXD tracks
93  path.add_module('FilterBasedVXDCDCTrackMerger',
94  extrapolate=False,
95  CDCRecoTrackStoreArrayName="CDCRecoTracks",
96  VXDRecoTrackStoreArrayName="VXDRecoTracks",
97  MergedRecoTrackStoreArrayName="RecoTracks")
98 
99  return path
100 
101  # This will be added to the "normal" path, to evaluate the mva cuts. In most cases, this is the same as the
102  # add_recording_modules (as the module parameters will be set automatically), but maybe you need
103  # more here...
104  def add_validation_modules(self, path):
105  mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
106  RecoTracksStoreArrayName='MCRecoTracks',
107  WhichParticles=[])
108 
109  # Merge CDC and CXD tracks
110  path.add_module('FilterBasedVXDCDCTrackMerger',
111  extrapolate=True,
112  CDCRecoTrackStoreArrayName="CDCRecoTracks",
113  VXDRecoTrackStoreArrayName="VXDRecoTracks",
114  MergedRecoTrackStoreArrayName="PrefitRecoTracks")
115 
116  path.add_module("SetupGenfitExtrapolation")
117 
118  path.add_module("DAFRecoFitter", recoTracksStoreArrayName="PrefitRecoTracks")
119 
120  path.add_module("TrackCreator", recoTrackColName="PrefitRecoTracks")
121 
122  path.add_module("FittedTracksStorer", inputRecoTracksStoreArrayName="PrefitRecoTracks",
123  outputRecoTracksStoreArrayName="RecoTracks")
124 
125  # We need to include the matching ourselves, as we have already a matching algorithm in place
126  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
127  prRecoTracksStoreArrayName="RecoTracks", UseCDCHits=True, UsePXDHits=True, UseSVDHits=True)
128 
129  return path
130  """
131 
132  def __init__(self, run_class, use_jupyter=True):
133  """Constructor"""
134 
135 
136  self.run_classrun_class = run_class
137 
138  self.use_jupyteruse_jupyter = use_jupyter
139 
140 
141  self.recording_file_namerecording_file_name = self.run_classrun_class.recording_module + ".root"
142 
143 
144  self.file_name_path, ext = os.path.splitext(self.recording_file_namerecording_file_name)
145 
146 
147  self.training_file_nametraining_file_name = self.file_name_path + "Training" + ext
148 
149  self.test_file_nametest_file_name = self.file_name_path + "Testing" + ext
150 
151 
152  self.identifier_nameidentifier_name = "FastBDT.weights.xml"
153 
154  self.evaluation_file_nameevaluation_file_name = self.identifier_nameidentifier_name + ".pdf"
155 
156 
157  self.expert_file_nameexpert_file_name = self.file_name_path + "TestingExport" + ext
158 
159 
160  self.weight_data_locationweight_data_location = Belle2.FileSystem.findFile(os.path.join("tracking/data",
161  self.run_classrun_class.weight_data_location))
162 
163  def train(self):
164  """Record a training file, split it in two parts and call the training method of the mva package"""
165  if not os.path.exists(self.recording_file_namerecording_file_name):
166  self._create_records_file_create_records_file()
167 
168  if not os.path.exists(self.training_file_nametraining_file_name) or not os.path.exists(self.test_file_nametest_file_name):
169  self._write_train_and_test_files_write_train_and_test_files()
170 
171  self._call_training_routine_call_training_routine()
172 
173  def evaluate_tracking(self):
174  """
175  Use the trained weight file and call the path again using different mva cuts. Validation using the
176  normal tracking validation modules.
177  """
178  copy(self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location)
179 
180  try:
181  os.mkdir("results")
182  except FileExistsError:
183  pass
184 
185  def create_path(mva_cut):
186  class ValidationRun(self.run_class, TrackingValidationRun):
187 
188  def finder_module(self, path):
189  self.add_validation_modules(path)
190 
191  if mva_cut != 999:
192  adjust_module(path, self.recording_module,
193  **{self.recording_parameter + "Parameters": {"cut": mva_cut},
194  self.recording_parameter: "mva"})
195  else:
196  adjust_module(path, self.recording_module, **{self.recording_parameter: "truth"})
197 
198  output_file_name = "results/validation_{mva_cut}.root".format(mva_cut=mva_cut)
199 
200  run = ValidationRun()
201 
202  if not os.path.exists(run.output_file_name):
203  return {"path": run.create_path()}
204  else:
205  return {"path": None}
206 
207  assert self.use_jupyteruse_jupyter
208 
209  calculations = handler.process_parameter_space(create_path, mva_cut=self.run_classrun_class.evaluation_cuts + [999])
210  calculations.start()
211  calculations.wait_for_end()
212 
213  return calculations
214 
216  """
217  Evaluate the classification power on the test data set and produce a PDF.
218  """
219  if not os.path.exists(self.expert_file_nameexpert_file_name) or not os.path.exists(self.evaluation_file_nameevaluation_file_name):
220  self._call_evaluation_routine_call_evaluation_routine()
221  self._call_expert_routine_call_expert_routine()
222 
223  df = uproot.concatenate(
224  self.expert_file_nameexpert_file_name,
225  library='pd').merge(
226  uproot.concatenate(
227  self.test_file_nametest_file_name,
228  library='pd'),
229  left_index=True,
230  right_index=True)
231 
232  if self.use_jupyteruse_jupyter:
233  from IPython.display import display
234  display(PDF(self.evaluation_file_nameevaluation_file_name, size=(800, 800)))
235 
236  return df
237 
239  """Call the mva training routine in the train file"""
240  try:
241  check_output(["trackfindingcdc_teacher", self.training_file_nametraining_file_name])
242  except CalledProcessError as e:
243  raise RuntimeError(e.output)
244 
246  """Split the recorded file into two halves: training and test file and write it back"""
247  # TODO: This seems to reorder the columns...
248  df = uproot.concatenate(self.recording_file_namerecording_file_name, library='pd')
249  mask = np.random.rand(len(df)) < 0.5
250  training_sample = df[mask]
251  test_sample = df[~mask]
252 
253  with uproot.recreate(self.training_file_nametraining_file_name) as outfile:
254  outfile["records"] = training_sample
255  with uproot.recreate(self.test_file_nametest_file_name) as outfile:
256  outfile["records"] = test_sample
257 
259  """
260  Create a path using the settings of the run_class and process it.
261  This will create a ROOT file with the recorded data.
262  """
263  recording_file_name = self.recording_file_namerecording_file_name
264 
265  class RecordRun(self.run_class, ReadOrGenerateEventsRun):
266 
267  def create_path(self):
268  path = ReadOrGenerateEventsRun.create_path(self)
269 
270  self.add_recording_modules(path)
271 
272  adjust_module(path, self.recording_module,
273  **{self.recording_parameter + "Parameters": {"rootFileName": recording_file_name},
274  self.recording_parameter: "recording"})
275 
276  return path
277 
278  run = RecordRun()
279  path = run.create_path()
280 
281  if self.use_jupyteruse_jupyter:
282  calculation = handler.process(path)
283  calculation.start()
284  calculation.wait_for_end()
285 
286  return calculation
287  else:
288  run.execute()
289 
291  """Call the mva expert"""
292  try:
293  check_output(["basf2_mva_expert",
294  "--identifiers", self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location,
295  "--datafiles", self.test_file_nametest_file_name,
296  "--outputfile", self.expert_file_nameexpert_file_name,
297  "--treename", "records"])
298  except CalledProcessError as e:
299  raise RuntimeError(e.output)
300 
302  """Call the mva evaluation routine"""
303  try:
304  check_output(["basf2_mva_evaluate.py",
305  "--identifiers", self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location,
306  "--train_datafiles", self.training_file_nametraining_file_name,
307  "--datafiles", self.test_file_nametest_file_name,
308  "--treename", "records",
309  "--outputfile", self.evaluation_file_nameevaluation_file_name],
310  stderr=STDOUT)
311  except CalledProcessError as e:
312  raise RuntimeError(e.output)
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:148
run_class
cached copy of the run class
Definition: analyse.py:136
evaluation_file_name
cached name of the output PDF file
Definition: analyse.py:154
def _call_training_routine(self)
Definition: analyse.py:238
def evaluate_classification(self)
Definition: analyse.py:215
test_file_name
cached path with extension of the testing-output file
Definition: analyse.py:149
use_jupyter
cached flag to use jupyter notebook
Definition: analyse.py:138
recording_file_name
cached name of the output file
Definition: analyse.py:141
weight_data_location
cached path of the weight input data
Definition: analyse.py:160
expert_file_name
cached path with extension of the testing-export file
Definition: analyse.py:157
training_file_name
cached path without extension of the output file
Definition: analyse.py:147
def __init__(self, run_class, use_jupyter=True)
Definition: analyse.py:132
identifier_name
cached identifier
Definition: analyse.py:152
def _write_train_and_test_files(self)
Definition: analyse.py:245
def _call_evaluation_routine(self)
Definition: analyse.py:301
def _repr_html_(self)
Definition: analyse.py:40
pdf
cached copy of the pdf filename
Definition: analyse.py:36
size
cached copy of the size
Definition: analyse.py:38
def __init__(self, pdf, size=(600, 700))
Definition: analyse.py:29
def _repr_latex_(self)
Definition: analyse.py:44
std::vector< std::vector< double > > merge(std::vector< std::vector< std::vector< double >>> toMerge)
merge { vector<double> a, vector<double> b} into {a, b}
Definition: tools.h:41