release-08-01-10/doxygen/analyse_8py_source.html

 from ipython_tools import handler

 import uproot

 import numpy as np

 import os.path

 from subprocess import check_output, CalledProcessError, STDOUT


 from shutil import copy


 from tracking.adjustments import adjust_module

 from tracking.run.event_generation import ReadOrGenerateEventsRun

 from tracking.validation.run import TrackingValidationRun


 from ROOT import Belle2


 class PDF:

     """

     Helper class to show a PDF file in a jupyter notebook.

     """


     def __init__(self, pdf, size=(600, 700)):

         """

         Show a PDF file.

         :param pdf: The filename of the PDF file.

         :param size: The size to use.

         """


         self.pdfpdf = pdf


         self.sizesize = size


     def _repr_html_(self):

         """HTML representation"""

         return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdfpdf, self.sizesize)


     def _repr_latex_(self):

         """LaTeX representation"""

         return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdfpdf)


 class MVATeacherAndAnalyser:

     """

     Class for training and analysing a tracking module, which has a MVA filter in it.


     Works best, if you are on a jupyter ntoebook.


     You need to supply a run_class, which includes all needed settings, on how to

     train and execute the module. This class will be mixed in with the normal trackfindingcdc

     run classes, so you can add the setting (e.g. tracking_coverage etc.) as normal.


     One examples is:


     class TestClass:

         # This module will be trained

         recording_module = "FilterBasedVXDCDCTrackMerger"

         # This is the name of the parameter of this module, which will be set to "mva" etc.

         recording_parameter = "filter"


         # These mva cuts will be tested during evaluation.

         evaluation_cuts = [0.1, 0.2, ...]


         tracking_coverage = {

             'UsePXDHits': True,

             'UseSVDHits': True,

             'UseCDCHits': True,

         }


         # Some options, which will control the run classes

         fit_tracks = False

         generator_module = "EvtGenInput"


         # This will be added to the "normal" path, to record the training data (you do not have to set the module to

         # recording, as this is done automatically).

         def add_recording_modules(self, path):

             mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',

                                     RecoTracksStoreArrayName='MCRecoTracks',

                                     WhichParticles=[])


             path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",

                             prRecoTracksStoreArrayName="CDCRecoTracks", UseCDCHits=True, UsePXDHits=False, UseSVDHits=False)

             path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",

                             prRecoTracksStoreArrayName="VXDRecoTracks", UseCDCHits=False, UsePXDHits=True, UseSVDHits=True)


             # Merge CDC and CXD tracks

             path.add_module('FilterBasedVXDCDCTrackMerger',

                             extrapolate=False,

                             CDCRecoTrackStoreArrayName="CDCRecoTracks",

                             VXDRecoTrackStoreArrayName="VXDRecoTracks",

                             MergedRecoTrackStoreArrayName="RecoTracks")


             return path


         # This will be added to the "normal" path, to evaluate the mva cuts. In most cases, this is the same as the

         # add_recording_modules (as the module parameters will be set automatically), but maybe you need

         # more here...

         def add_validation_modules(self, path):

             mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',

                                     RecoTracksStoreArrayName='MCRecoTracks',

                                     WhichParticles=[])


             # Merge CDC and CXD tracks

             path.add_module('FilterBasedVXDCDCTrackMerger',

                             extrapolate=True,

                             CDCRecoTrackStoreArrayName="CDCRecoTracks",

                             VXDRecoTrackStoreArrayName="VXDRecoTracks",

                             MergedRecoTrackStoreArrayName="PrefitRecoTracks")


             path.add_module("SetupGenfitExtrapolation")


             path.add_module("DAFRecoFitter", recoTracksStoreArrayName="PrefitRecoTracks")


             path.add_module("TrackCreator", recoTrackColName="PrefitRecoTracks")


             path.add_module("FittedTracksStorer", inputRecoTracksStoreArrayName="PrefitRecoTracks",

                             outputRecoTracksStoreArrayName="RecoTracks")


             # We need to include the matching ourselves, as we have already a matching algorithm in place

             path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",

                             prRecoTracksStoreArrayName="RecoTracks", UseCDCHits=True, UsePXDHits=True, UseSVDHits=True)


             return path

     """


     def __init__(self, run_class, use_jupyter=True):

         """Constructor"""


         self.run_classrun_class = run_class


         self.use_jupyteruse_jupyter = use_jupyter


         self.recording_file_namerecording_file_name = self.run_classrun_class.recording_module + ".root"


         self.file_name_path, ext = os.path.splitext(self.recording_file_namerecording_file_name)


         self.training_file_nametraining_file_name = self.file_name_path + "Training" + ext


         self.test_file_nametest_file_name = self.file_name_path + "Testing" + ext


         self.identifier_nameidentifier_name = "FastBDT.weights.xml"


         self.evaluation_file_nameevaluation_file_name = self.identifier_nameidentifier_name + ".pdf"


         self.expert_file_nameexpert_file_name = self.file_name_path + "TestingExport" + ext


         self.weight_data_locationweight_data_location = Belle2.FileSystem.findFile(os.path.join("tracking/data",

                                                                             self.run_classrun_class.weight_data_location))


     def train(self):

         """Record a training file, split it in two parts and call the training method of the mva package"""

         if not os.path.exists(self.recording_file_namerecording_file_name):

             self._create_records_file_create_records_file()


         if not os.path.exists(self.training_file_nametraining_file_name) or not os.path.exists(self.test_file_nametest_file_name):

             self._write_train_and_test_files_write_train_and_test_files()


         self._call_training_routine_call_training_routine()


     def evaluate_tracking(self):

         """

         Use the trained weight file and call the path again using different mva cuts. Validation using the

         normal tracking validation modules.

         """

         copy(self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location)


         try:

             os.mkdir("results")

         except FileExistsError:

             pass


         def create_path(mva_cut):

             class ValidationRun(self.run_class, TrackingValidationRun):


                 def finder_module(self, path):

                     self.add_validation_modules(path)


                     if mva_cut != 999:

                         adjust_module(path, self.recording_module,

                                       **{self.recording_parameter + "Parameters": {"cut": mva_cut},

                                          self.recording_parameter: "mva"})

                     else:

                         adjust_module(path, self.recording_module, **{self.recording_parameter: "truth"})


                 output_file_name = "results/validation_{mva_cut}.root".format(mva_cut=mva_cut)


             run = ValidationRun()


             if not os.path.exists(run.output_file_name):

                 return {"path": run.create_path()}

             else:

                 return {"path": None}


         assert self.use_jupyteruse_jupyter


         calculations = handler.process_parameter_space(create_path, mva_cut=self.run_classrun_class.evaluation_cuts + [999])

         calculations.start()

         calculations.wait_for_end()


         return calculations


     def evaluate_classification(self):

         """

         Evaluate the classification power on the test data set and produce a PDF.

         """

         if not os.path.exists(self.expert_file_nameexpert_file_name) or not os.path.exists(self.evaluation_file_nameevaluation_file_name):

             self._call_evaluation_routine_call_evaluation_routine()

             self._call_expert_routine_call_expert_routine()


         df = uproot.concatenate(

             self.expert_file_nameexpert_file_name,

             library='pd').merge(

             uproot.concatenate(

                 self.test_file_nametest_file_name,

                 library='pd'),

             left_index=True,

             right_index=True)


         if self.use_jupyteruse_jupyter:

             from IPython.display import display

             display(PDF(self.evaluation_file_nameevaluation_file_name, size=(800, 800)))


         return df


     def _call_training_routine(self):

         """Call the mva training routine in the train file"""

         try:

             check_output(["trackfindingcdc_teacher", self.training_file_nametraining_file_name])

         except CalledProcessError as e:

             raise RuntimeError(e.output)


     def _write_train_and_test_files(self):

         """Split the recorded file into two halves: training and test file and write it back"""

         # TODO: This seems to reorder the columns...

         df = uproot.concatenate(self.recording_file_namerecording_file_name, library='pd')

         mask = np.random.rand(len(df)) < 0.5

         training_sample = df[mask]

         test_sample = df[~mask]


         with uproot.recreate(self.training_file_nametraining_file_name) as outfile:

             outfile["records"] = training_sample

         with uproot.recreate(self.test_file_nametest_file_name) as outfile:

             outfile["records"] = test_sample


     def _create_records_file(self):

         """

         Create a path using the settings of the run_class and process it.

         This will create a ROOT file with the recorded data.

         """

         recording_file_name = self.recording_file_namerecording_file_name


         class RecordRun(self.run_class, ReadOrGenerateEventsRun):


             def create_path(self):

                 path = ReadOrGenerateEventsRun.create_path(self)


                 self.add_recording_modules(path)


                 adjust_module(path, self.recording_module,

                               **{self.recording_parameter + "Parameters": {"rootFileName": recording_file_name},

                                  self.recording_parameter: "recording"})


                 return path


         run = RecordRun()

         path = run.create_path()


         if self.use_jupyteruse_jupyter:

             calculation = handler.process(path)

             calculation.start()

             calculation.wait_for_end()


             return calculation

         else:

             run.execute()


     def _call_expert_routine(self):

         """Call the mva expert"""

         try:

             check_output(["basf2_mva_expert",

                           "--identifiers", self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location,

                           "--datafiles", self.test_file_nametest_file_name,

                           "--outputfile", self.expert_file_nameexpert_file_name,

                           "--treename", "records"])

         except CalledProcessError as e:

             raise RuntimeError(e.output)


     def _call_evaluation_routine(self):

         """Call the mva evaluation routine"""

         try:

             check_output(["basf2_mva_evaluate.py",

                           "--identifiers", self.identifier_nameidentifier_name, self.weight_data_locationweight_data_location,

                           "--train_datafiles", self.training_file_nametraining_file_name,

                           "--datafiles", self.test_file_nametest_file_name,

                           "--treename", "records",

                           "--outputfile", self.evaluation_file_nameevaluation_file_name],

                          stderr=STDOUT)

         except CalledProcessError as e:

             raise RuntimeError(e.output)

Belle2::FileSystem::findFile
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:148

analyse.MVATeacherAndAnalyser
Definition: analyse.py:49

analyse.MVATeacherAndAnalyser.run_class
run_class
cached copy of the run class
Definition: analyse.py:136

analyse.MVATeacherAndAnalyser.evaluation_file_name
evaluation_file_name
cached name of the output PDF file
Definition: analyse.py:154

analyse.MVATeacherAndAnalyser._call_training_routine
def _call_training_routine(self)
Definition: analyse.py:238

analyse.MVATeacherAndAnalyser.evaluate_classification
def evaluate_classification(self)
Definition: analyse.py:215

analyse.MVATeacherAndAnalyser.train
def train(self)
Definition: analyse.py:163

analyse.MVATeacherAndAnalyser.test_file_name
test_file_name
cached path with extension of the testing-output file
Definition: analyse.py:149

analyse.MVATeacherAndAnalyser.use_jupyter
use_jupyter
cached flag to use jupyter notebook
Definition: analyse.py:138

analyse.MVATeacherAndAnalyser._call_expert_routine
def _call_expert_routine(self)
Definition: analyse.py:290

analyse.MVATeacherAndAnalyser.recording_file_name
recording_file_name
cached name of the output file
Definition: analyse.py:141

analyse.MVATeacherAndAnalyser.weight_data_location
weight_data_location
cached path of the weight input data
Definition: analyse.py:160

analyse.MVATeacherAndAnalyser.expert_file_name
expert_file_name
cached path with extension of the testing-export file
Definition: analyse.py:157

analyse.MVATeacherAndAnalyser.training_file_name
training_file_name
cached path without extension of the output file
Definition: analyse.py:147

analyse.MVATeacherAndAnalyser.__init__
def __init__(self, run_class, use_jupyter=True)
Definition: analyse.py:132

analyse.MVATeacherAndAnalyser._create_records_file
def _create_records_file(self)
Definition: analyse.py:258

analyse.MVATeacherAndAnalyser.evaluate_tracking
def evaluate_tracking(self)
Definition: analyse.py:173

analyse.MVATeacherAndAnalyser.identifier_name
identifier_name
cached identifier
Definition: analyse.py:152

analyse.MVATeacherAndAnalyser._write_train_and_test_files
def _write_train_and_test_files(self)
Definition: analyse.py:245

analyse.MVATeacherAndAnalyser._call_evaluation_routine
def _call_evaluation_routine(self)
Definition: analyse.py:301

analyse.PDF
Definition: analyse.py:24

analyse.PDF._repr_html_
def _repr_html_(self)
Definition: analyse.py:40

analyse.PDF.pdf
pdf
cached copy of the pdf filename
Definition: analyse.py:36

analyse.PDF.size
size
cached copy of the size
Definition: analyse.py:38

analyse.PDF.__init__
def __init__(self, pdf, size=(600, 700))
Definition: analyse.py:29

analyse.PDF._repr_latex_
def _repr_latex_(self)
Definition: analyse.py:44

tracking.run.event_generation.ReadOrGenerateEventsRun
Definition: event_generation.py:35

tracking.validation.run.TrackingValidationRun
Definition: run.py:25

Belle2::merge
std::vector< std::vector< double > > merge(std::vector< std::vector< std::vector< double >>> toMerge)
merge { vector<double> a, vector<double> b} into {a, b}
Definition: tools.h:41

display
Definition: display.py:1

tracking.adjustments
Definition: adjustments.py:1

tracking.run.event_generation
Definition: event_generation.py:1

tracking.validation.run
Definition: run.py:1