development/doxygen/analyse_8py_source.html

from ipython_tools import handler

import uproot

import numpy as np

import os.path

from subprocess import check_output, CalledProcessError, STDOUT


from shutil import copy


from tracking.adjustments import adjust_module

from tracking.run.event_generation import ReadOrGenerateEventsRun

from tracking.validation.run import TrackingValidationRun


from ROOT import Belle2


class PDF:

    """

    Helper class to show a PDF file in a jupyter notebook.

    """


    def __init__(self, pdf, size=(600, 700)):

        """

        Show a PDF file.

        :param pdf: The filename of the PDF file.

        :param size: The size to use.

        """


        self.pdf = pdf


        self.size = size


    def _repr_html_(self):

        """HTML representation"""

        return f'<iframe src={self.pdf} width={self.size[0]} height={self.size[1]}></iframe>'


    def _repr_latex_(self):

        """LaTeX representation"""

        return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)


class MVATeacherAndAnalyser:

    """

    Class for training and analysing a tracking module, which has a MVA filter in it.


    Works best, if you are on a jupyter ntoebook.


    You need to supply a run_class, which includes all needed settings, on how to

    train and execute the module. This class will be mixed in with the normal trackfindingcdc

    run classes, so you can add the setting (e.g. tracking_coverage etc.) as normal.


    One examples is:


    class TestClass:

        # This module will be trained

        recording_module = "FilterBasedVXDCDCTrackMerger"

        # This is the name of the parameter of this module, which will be set to "mva" etc.

        recording_parameter = "filter"


        # These mva cuts will be tested during evaluation.

        evaluation_cuts = [0.1, 0.2, ...]


        tracking_coverage = {

            'UsePXDHits': True,

            'UseSVDHits': True,

            'UseCDCHits': True,

        }


        # Some options, which will control the run classes

        fit_tracks = False

        generator_module = "EvtGenInput"


        # This will be added to the "normal" path, to record the training data (you do not have to set the module to

        # recording, as this is done automatically).

        def add_recording_modules(self, path):

            mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',

                                    RecoTracksStoreArrayName='MCRecoTracks',

                                    WhichParticles=[])


            path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",

                            prRecoTracksStoreArrayName="CDCRecoTracks", UseCDCHits=True, UsePXDHits=False, UseSVDHits=False)

            path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",

                            prRecoTracksStoreArrayName="VXDRecoTracks", UseCDCHits=False, UsePXDHits=True, UseSVDHits=True)


            # Merge CDC and CXD tracks

            path.add_module('FilterBasedVXDCDCTrackMerger',

                            extrapolate=False,

                            CDCRecoTrackStoreArrayName="CDCRecoTracks",

                            VXDRecoTrackStoreArrayName="VXDRecoTracks",

                            MergedRecoTrackStoreArrayName="RecoTracks")


            return path


        # This will be added to the "normal" path, to evaluate the mva cuts. In most cases, this is the same as the

        # add_recording_modules (as the module parameters will be set automatically), but maybe you need

        # more here...

        def add_validation_modules(self, path):

            mctrackfinder = path.add_module('TrackFinderMCTruthRecoTracks',

                                    RecoTracksStoreArrayName='MCRecoTracks',

                                    WhichParticles=[])


            # Merge CDC and CXD tracks

            path.add_module('FilterBasedVXDCDCTrackMerger',

                            extrapolate=True,

                            CDCRecoTrackStoreArrayName="CDCRecoTracks",

                            VXDRecoTrackStoreArrayName="VXDRecoTracks",

                            MergedRecoTrackStoreArrayName="PrefitRecoTracks")


            path.add_module("SetupGenfitExtrapolation")


            path.add_module("DAFRecoFitter", recoTracksStoreArrayName="PrefitRecoTracks")


            path.add_module("TrackCreator", recoTrackColName="PrefitRecoTracks")


            path.add_module("FittedTracksStorer", inputRecoTracksStoreArrayName="PrefitRecoTracks",

                            outputRecoTracksStoreArrayName="RecoTracks")


            # We need to include the matching ourselves, as we have already a matching algorithm in place

            path.add_module('MCRecoTracksMatcher', mcRecoTracksStoreArrayName="MCRecoTracks",

                            prRecoTracksStoreArrayName="RecoTracks", UseCDCHits=True, UsePXDHits=True, UseSVDHits=True)


            return path

    """


    def __init__(self, run_class, use_jupyter=True):

        """Constructor"""


        self.run_class = run_class


        self.use_jupyter = use_jupyter


        self.recording_file_name = self.run_class.recording_module + ".root"


        self.file_name_path, ext = os.path.splitext(self.recording_file_name)


        self.training_file_name = self.file_name_path + "Training" + ext


        self.test_file_name = self.file_name_path + "Testing" + ext


        self.identifier_name = "FastBDT.weights.xml"


        self.evaluation_file_name = self.identifier_name + ".pdf"


        self.expert_file_name = self.file_name_path + "TestingExport" + ext


        self.weight_data_location = Belle2.FileSystem.findFile(os.path.join("tracking/data",

                                                                            self.run_class.weight_data_location))


    def train(self):

        """Record a training file, split it in two parts and call the training method of the mva package"""

        if not os.path.exists(self.recording_file_name):

            self._create_records_file()


        if not os.path.exists(self.training_file_name) or not os.path.exists(self.test_file_name):

            self._write_train_and_test_files()


        self._call_training_routine()


    def evaluate_tracking(self):

        """

        Use the trained weight file and call the path again using different mva cuts. Validation using the

        normal tracking validation modules.

        """

        copy(self.identifier_name, self.weight_data_location)


        try:

            os.mkdir("results")

        except FileExistsError:

            pass


        def create_path(mva_cut):

            class ValidationRun(self.run_class, TrackingValidationRun):


                def finder_module(self, path):

                    self.add_validation_modules(path)


                    if mva_cut != 999:

                        adjust_module(path, self.recording_module,

                                      **{self.recording_parameter + "Parameters": {"cut": mva_cut},

                                         self.recording_parameter: "mva"})

                    else:

                        adjust_module(path, self.recording_module, **{self.recording_parameter: "truth"})


                output_file_name = f"results/validation_{mva_cut}.root"


            run = ValidationRun()


            if not os.path.exists(run.output_file_name):

                return {"path": run.create_path()}

            else:

                return {"path": None}


        assert self.use_jupyter


        calculations = handler.process_parameter_space(create_path, mva_cut=self.run_class.evaluation_cuts + [999])

        calculations.start()

        calculations.wait_for_end()


        return calculations


    def evaluate_classification(self):

        """

        Evaluate the classification power on the test data set and produce a PDF.

        """

        if not os.path.exists(self.expert_file_name) or not os.path.exists(self.evaluation_file_name):

            self._call_evaluation_routine()

            self._call_expert_routine()


        df = uproot.concatenate(

            self.expert_file_name,

            library='pd').merge(

            uproot.concatenate(

                self.test_file_name,

                library='pd'),

            left_index=True,

            right_index=True)


        if self.use_jupyter:

            from IPython.display import display

            display(PDF(self.evaluation_file_name, size=(800, 800)))


        return df


    def _call_training_routine(self):

        """Call the mva training routine in the train file"""

        try:

            check_output(["trackfindingcdc_teacher", self.training_file_name])

        except CalledProcessError as e:

            raise RuntimeError(e.output)


    def _write_train_and_test_files(self):

        """Split the recorded file into two halves: training and test file and write it back"""

        # TODO: This seems to reorder the columns...

        df = uproot.concatenate(self.recording_file_name, library='pd')

        mask = np.random.rand(len(df)) < 0.5

        training_sample = df[mask]

        test_sample = df[~mask]


        with uproot.recreate(self.training_file_name) as outfile:

            outfile["records"] = training_sample

        with uproot.recreate(self.test_file_name) as outfile:

            outfile["records"] = test_sample


    def _create_records_file(self):

        """

        Create a path using the settings of the run_class and process it.

        This will create a ROOT file with the recorded data.

        """

        recording_file_name = self.recording_file_name


        class RecordRun(self.run_class, ReadOrGenerateEventsRun):


            def create_path(self):

                path = ReadOrGenerateEventsRun.create_path(self)


                self.add_recording_modules(path)


                adjust_module(path, self.recording_module,

                              **{self.recording_parameter + "Parameters": {"rootFileName": recording_file_name},

                                 self.recording_parameter: "recording"})


                return path


        run = RecordRun()

        path = run.create_path()


        if self.use_jupyter:

            calculation = handler.process(path)

            calculation.start()

            calculation.wait_for_end()


            return calculation

        else:

            run.execute()


    def _call_expert_routine(self):

        """Call the mva expert"""

        try:

            check_output(["basf2_mva_expert",

                          "--identifiers", self.identifier_name, self.weight_data_location,

                          "--datafiles", self.test_file_name,

                          "--outputfile", self.expert_file_name,

                          "--treename", "records"])

        except CalledProcessError as e:

            raise RuntimeError(e.output)


    def _call_evaluation_routine(self):

        """Call the mva evaluation routine"""

        try:

            check_output(["basf2_mva_evaluate.py",

                          "--identifiers", self.identifier_name, self.weight_data_location,

                          "--train_datafiles", self.training_file_name,

                          "--datafiles", self.test_file_name,

                          "--treename", "records",

                          "--outputfile", self.evaluation_file_name],

                         stderr=STDOUT)

        except CalledProcessError as e:

            raise RuntimeError(e.output)

Belle2::FileSystem::findFile
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151

analyse.MVATeacherAndAnalyser
Definition: analyse.py:49

analyse.MVATeacherAndAnalyser.run_class
run_class
cached copy of the run class
Definition: analyse.py:136

analyse.MVATeacherAndAnalyser.evaluation_file_name
evaluation_file_name
cached name of the output PDF file
Definition: analyse.py:154

analyse.MVATeacherAndAnalyser._call_training_routine
def _call_training_routine(self)
Definition: analyse.py:238

analyse.MVATeacherAndAnalyser.evaluate_classification
def evaluate_classification(self)
Definition: analyse.py:215

analyse.MVATeacherAndAnalyser.test_file_name
test_file_name
cached path with extension of the testing-output file
Definition: analyse.py:149

analyse.MVATeacherAndAnalyser.use_jupyter
use_jupyter
cached flag to use jupyter notebook
Definition: analyse.py:138

analyse.MVATeacherAndAnalyser._call_expert_routine
def _call_expert_routine(self)
Definition: analyse.py:290

analyse.MVATeacherAndAnalyser.recording_file_name
recording_file_name
cached name of the output file
Definition: analyse.py:141

analyse.MVATeacherAndAnalyser.weight_data_location
weight_data_location
cached path of the weight input data
Definition: analyse.py:160

analyse.MVATeacherAndAnalyser.expert_file_name
expert_file_name
cached path with extension of the testing-export file
Definition: analyse.py:157

analyse.MVATeacherAndAnalyser.training_file_name
training_file_name
cached path without extension of the output file
Definition: analyse.py:147

analyse.MVATeacherAndAnalyser.__init__
def __init__(self, run_class, use_jupyter=True)
Definition: analyse.py:132

analyse.MVATeacherAndAnalyser._create_records_file
def _create_records_file(self)
Definition: analyse.py:258

analyse.MVATeacherAndAnalyser.evaluate_tracking
def evaluate_tracking(self)
Definition: analyse.py:173

analyse.MVATeacherAndAnalyser.identifier_name
identifier_name
cached identifier
Definition: analyse.py:152

analyse.MVATeacherAndAnalyser._write_train_and_test_files
def _write_train_and_test_files(self)
Definition: analyse.py:245

analyse.MVATeacherAndAnalyser._call_evaluation_routine
def _call_evaluation_routine(self)
Definition: analyse.py:301

analyse.PDF
Definition: analyse.py:24

analyse.PDF._repr_html_
def _repr_html_(self)
Definition: analyse.py:40

analyse.PDF.pdf
pdf
cached copy of the pdf filename
Definition: analyse.py:36

analyse.PDF.size
size
cached copy of the size
Definition: analyse.py:38

analyse.PDF.__init__
def __init__(self, pdf, size=(600, 700))
Definition: analyse.py:29

analyse.PDF._repr_latex_
def _repr_latex_(self)
Definition: analyse.py:44

tracking.run.event_generation.ReadOrGenerateEventsRun
Definition: event_generation.py:34

tracking.validation.run.TrackingValidationRun
Definition: run.py:24

display
Definition: display.py:1

trackfindingcdc
Definition: __init__.py:1

tracking.adjustments
Definition: adjustments.py:1

tracking.run.event_generation
Definition: event_generation.py:1

tracking.validation.run
Definition: run.py:1

train
Definition: train.py:1