Belle II Software  release-06-02-00
analyse.py
1 
8 
9 from ipython_tools import handler
10 from root_pandas import read_root, to_root
11 import numpy as np
12 import os.path
13 from subprocess import check_output, CalledProcessError, STDOUT
14 
15 from shutil import copy
16 
17 from tracking.adjustments import adjust_module
18 from tracking.run.event_generation import ReadOrGenerateEventsRun
19 from tracking.validation.run import TrackingValidationRun
20 
21 from ROOT import Belle2
22 
23 
24 class PDF:
25  """
26  Helper class to show a PDF file in a jupyter notebook.
27  """
28 
29  def __init__(self, pdf, size=(600, 700)):
30  """
31  Show a PDF file.
32  :param pdf: The filename of the PDF file.
33  :param size: The size to use.
34  """
35 
36  self.pdfpdf = pdf
37 
38  self.sizesize = size
39 
40  def _repr_html_(self):
41  """HTML representation"""
42  return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdfpdf, self.sizesize)
43 
44  def _repr_latex_(self):
45  """LaTeX representation"""
46  return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdfpdf)
47 
48 
50  """
51  Class for training and analysing a tracking module, which has a MVA filter in it.
52 
53  Works best, if you are on a jupyter ntoebook.
54 
55  You need to supply a run_class, which includes all needed settings, on how to
56  train and execute the module. This class will be mixed in with the normal trackfindingcdc
57  run classes, so you can add the setting (e.g. tracking_coverage etc.) as normal.
58 
59  One examples is:
60 
61  class TestClass:
62  # This module will be trained
63  recording_module = "FilterBasedVXDCDCTrackMerger"
64  # This is the name of the parameter of this module, which will be set to "mva" etc.
65  recording_parameter = "filter"
66 
67  # These mva cuts will be tested during evaluation.
68  evaluation_cuts = [0.1, 0.2, ...]
69 
70  tracking_coverage = {
71  'UsePXDHits': True,
72  'UseSVDHits': True,
73  'UseCDCHits': True,
74  }
75 
76  # Some options, which will control the run classes
77  fit_tracks = False
78  generator_module = "EvtGenInput"
79 
80  # This will be added to the "normal" path, to record the training data (you do not have to set the module to
81  # recording, as this is done automatically).
82  def add_recording_modules(self, path):
83  mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
84  RecoTracksStoreArrayName='MCRecoTracks',
85  WhichParticles=[])
86 
87  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
88  prRecoTracksStoreArrayName="CDCRecoTracks", UseCDCHits=True, UsePXDHits=False, UseSVDHits=False)
89  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
90  prRecoTracksStoreArrayName="VXDRecoTracks", UseCDCHits=False, UsePXDHits=True, UseSVDHits=True)
91 
92  # Merge CDC and CXD tracks
93  path.add_module('FilterBasedVXDCDCTrackMerger',
94  extrapolate=False,
95  CDCRecoTrackStoreArrayName="CDCRecoTracks",
96  VXDRecoTrackStoreArrayName="VXDRecoTracks",
97  MergedRecoTrackStoreArrayName="RecoTracks")
98 
99  return path
100 
101  # This will be added to the "normal" path, to evaluate the mva cuts. In most cases, this is the same as the
102  # add_recording_modules (as the module parameters will be set automatically), but maybe you need
103  # more here...
104  def add_validation_modules(self, path):
105  mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',
106  RecoTracksStoreArrayName='MCRecoTracks',
107  WhichParticles=[])
108 
109  # Merge CDC and CXD tracks
110  path.add_module('FilterBasedVXDCDCTrackMerger',
111  extrapolate=True,
112  CDCRecoTrackStoreArrayName="CDCRecoTracks",
113  VXDRecoTrackStoreArrayName="VXDRecoTracks",
114  MergedRecoTrackStoreArrayName="PrefitRecoTracks")
115 
116  path.add_module("SetupGenfitExtrapolation")
117 
118  path.add_module("DAFRecoFitter", recoTracksStoreArrayName="PrefitRecoTracks")
119 
120  path.add_module("TrackCreator", recoTrackColName="PrefitRecoTracks")
121 
122  path.add_module("FittedTracksStorer", inputRecoTracksStoreArrayName="PrefitRecoTracks",
123  outputRecoTracksStoreArrayName="RecoTracks")
124 
125  # We need to include the matching ourselves, as we have already a matching algorithm in place
126  path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",
127  prRecoTracksStoreArrayName="RecoTracks", UseCDCHits=True, UsePXDHits=True, UseSVDHits=True)
128 
129  return path
130  """
131 
132  def __init__(self, run_class, use_jupyter=True):
133  """Constructor"""
134 
135 
136  self.run_classrun_class = run_class
137 
138  self.use_jupyteruse_jupyter = use_jupyter
139 
140 
141  self.recording_file_namerecording_file_name = self.run_classrun_class.recording_module + ".root"
142 
143 
144  self.file_name_path, ext = os.path.splitext(self.recording_file_namerecording_file_name)
145 
146 
147  self.training_file_nametraining_file_name = self.file_name_path + "Training" + ext
148 
149  self.test_file_nametest_file_name = self.file_name_path + "Testing" + ext
150 
151 
152  self.identifier_nameidentifier_name = "FastBDT.weights.xml"
153 
154  self.evaluation_file_nameevaluation_file_name = self.identifier_nameidentifier_name + ".pdf"
155 
156 
157  self.expert_file_nameexpert_file_name = self.file_name_path + "TestingExport" + ext
158 
159 
160  self.weight_data_locationweight_data_location = Belle2.FileSystem.findFile(os.path.join("tracking/data",
161  self.run_classrun_class.weight_data_location))
162 
163  def train(self):
164  """Record a training file, split it in two parts and call the training method of the mva package"""
165  if not os.path.exists(self.recording_file_namerecording_file_name):
166  self._create_records_file_create_records_file()
167 
168  if not os.path.exists(self.training_file_nametraining_file_name) or not os.path.exists(self.test_file_nametest_file_name):
169  self._write_train_and_test_files_write_train_and_test_files()
170 
171  self._call_training_routine_call_training_routine()
172 
173  def evaluate_tracking(self):
174  """
175  Use the trained weight file and call the path again using different mva cuts. Validation using the
176  normal tracking validation modules.
177  """
178  copy(self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location)
179 
180  try:
181  os.mkdir("results")
182  except FileExistsError:
183  pass
184 
185  def create_path(mva_cut):
186  class ValidationRun(self.run_class, TrackingValidationRun):
187 
188  def finder_module(self, path):
189  self.add_validation_modules(path)
190 
191  if mva_cut != 999:
192  adjust_module(path, self.recording_module,
193  **{self.recording_parameter + "Parameters": {"cut": mva_cut},
194  self.recording_parameter: "mva"})
195  else:
196  adjust_module(path, self.recording_module, **{self.recording_parameter: "truth"})
197 
198  output_file_name = "results/validation_{mva_cut}.root".format(mva_cut=mva_cut)
199 
200  run = ValidationRun()
201 
202  if not os.path.exists(run.output_file_name):
203  return {"path": run.create_path()}
204  else:
205  return {"path": None}
206 
207  assert self.use_jupyteruse_jupyter
208 
209  calculations = handler.process_parameter_space(create_path, mva_cut=self.run_classrun_class.evaluation_cuts + [999])
210  calculations.start()
211  calculations.wait_for_end()
212 
213  return calculations
214 
216  """
217  Evaluate the classification power on the test data set and produce a PDF.
218  """
219  if not os.path.exists(self.expert_file_nameexpert_file_name) or not os.path.exists(self.evaluation_file_nameevaluation_file_name):
220  self._call_evaluation_routine_call_evaluation_routine()
221  self._call_expert_routine_call_expert_routine()
222 
223  df = read_root(self.expert_file_nameexpert_file_name).merge(read_root(self.test_file_nametest_file_name), left_index=True, right_index=True)
224 
225  if self.use_jupyteruse_jupyter:
226  from IPython.display import display
227  display(PDF(self.evaluation_file_nameevaluation_file_name, size=(800, 800)))
228 
229  return df
230 
232  """Call the mva training routine in the train file"""
233  try:
234  check_output(["trackfindingcdc_teacher", self.training_file_nametraining_file_name])
235  except CalledProcessError as e:
236  raise RuntimeError(e.output)
237 
239  """Split the recorded file into two halves: training and test file and write it back"""
240  # TODO: This seems to reorder the columns...
241  df = read_root(self.recording_file_namerecording_file_name)
242  mask = np.random.rand(len(df)) < 0.5
243  training_sample = df[mask]
244  test_sample = df[~mask]
245 
246  to_root(training_sample, self.training_file_nametraining_file_name, tree_key="records")
247  to_root(test_sample, self.test_file_nametest_file_name, tree_key="records")
248 
250  """
251  Create a path using the settings of the run_class and process it.
252  This will create a ROOT file with the recorded data.
253  """
254  recording_file_name = self.recording_file_namerecording_file_name
255 
256  class RecordRun(self.run_class, ReadOrGenerateEventsRun):
257 
258  def create_path(self):
259  path = ReadOrGenerateEventsRun.create_path(self)
260 
261  self.add_recording_modules(path)
262 
263  adjust_module(path, self.recording_module,
264  **{self.recording_parameter + "Parameters": {"rootFileName": recording_file_name},
265  self.recording_parameter: "recording"})
266 
267  return path
268 
269  run = RecordRun()
270  path = run.create_path()
271 
272  if self.use_jupyteruse_jupyter:
273  calculation = handler.process(path)
274  calculation.start()
275  calculation.wait_for_end()
276 
277  return calculation
278  else:
279  run.execute()
280 
282  """Call the mva expert"""
283  try:
284  check_output(["basf2_mva_expert",
285  "--identifiers", self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location,
286  "--datafiles", self.test_file_nametest_file_name,
287  "--outputfile", self.expert_file_nameexpert_file_name,
288  "--treename", "records"])
289  except CalledProcessError as e:
290  raise RuntimeError(e.output)
291 
293  """Call the mva evaluation routine"""
294  try:
295  check_output(["basf2_mva_evaluate.py",
296  "--identifiers", self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location,
297  "--train_datafiles", self.training_file_nametraining_file_name,
298  "--datafiles", self.test_file_nametest_file_name,
299  "--treename", "records",
300  "--outputfile", self.evaluation_file_nameevaluation_file_name],
301  stderr=STDOUT)
302  except CalledProcessError as e:
303  raise RuntimeError(e.output)
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:145
run_class
cached copy of the run class
Definition: analyse.py:136
evaluation_file_name
cached name of the output PDF file
Definition: analyse.py:154
def _call_training_routine(self)
Definition: analyse.py:231
def evaluate_classification(self)
Definition: analyse.py:215
test_file_name
cached path with extension of the testing-output file
Definition: analyse.py:149
use_jupyter
cached flag to use jupyter notebook
Definition: analyse.py:138
recording_file_name
cached name of the output file
Definition: analyse.py:141
weight_data_location
cached path of the weight input data
Definition: analyse.py:160
expert_file_name
cached path with extension of the testing-export file
Definition: analyse.py:157
training_file_name
cached path without extension of the output file
Definition: analyse.py:147
def __init__(self, run_class, use_jupyter=True)
Definition: analyse.py:132
identifier_name
cached identifier
Definition: analyse.py:152
def _write_train_and_test_files(self)
Definition: analyse.py:238
def _call_evaluation_routine(self)
Definition: analyse.py:292
def _repr_html_(self)
Definition: analyse.py:40
pdf
cached copy of the pdf filename
Definition: analyse.py:36
size
cached copy of the size
Definition: analyse.py:38
def __init__(self, pdf, size=(600, 700))
Definition: analyse.py:29
def _repr_latex_(self)
Definition: analyse.py:44
std::vector< std::vector< double > > merge(std::vector< std::vector< std::vector< double >>> toMerge)
merge { vector<double> a, vector<double> b} into {a, b}
Definition: tools.h:41