development/doxygen/training__flipping__mvas_8py_source.html

#!/usr/bin/env python3


# Use training in a basf2 path

# Run basics/*.py before


import basf2_mva

import basf2_mva_util

import numpy as np

from basf2 import conditions, B2FATAL

import argparse


def get_argument_parser() -> argparse.ArgumentParser:

    """ Parses the command line options of the flipping mva training and returns the corresponding arguments. """

    parser = argparse.ArgumentParser()

    parser.add_argument('-train', default='', type=str,

                        help='Data file containing ROOT TTree used during training. Default: \'\'.')

    parser.add_argument('-data', default='', type=str,

                        help='Data file containing ROOT TTree with independent test data. Default: \'\'.')

    parser.add_argument('-tree', default='', type=str,

                        help='Treename in data file. Default: \'\'.')

    parser.add_argument('-mva', default=1, type=int,

                        help='index of mva to be trained. Default: 1')


    return parser


def get_variables(index=1):

    var = []

    if index == 1:

        var = ["seed_pz_estimate",

               "seed_pz_variance",

               "seed_z_estimate",

               "seed_tan_lambda_estimate",

               "seed_pt_estimate",

               "seed_x_estimate",

               "seed_y_estimate",

               "seed_py_variance",

               "seed_d0_estimate",

               "seed_omega_variance",

               "svd_layer6_clsTime",

               "seed_tan_lambda_variance",

               "seed_z_variance",

               "n_svd_hits",

               "n_cdc_hits",

               "svd_layer3_positionSigma",

               "first_cdc_layer",

               "last_cdc_layer",

               "InOutArmTimeDifference",

               "InOutArmTimeDifferenceError",

               "inGoingArmTime",

               "inGoingArmTimeError",

               "outGoingArmTime",

               "outGoingArmTimeError"]


    if index == 2:

        # training variables

        var = [

              "flipped_pz_estimate",

              "tan_lambda_estimate",

              "d0_variance",

              "z_estimate",

              "px_variance",

              "p_value",

              "pt_estimate",

              "y_estimate",

              "d0_estimate",

              "x_estimate",

              "pz_variance",

              "omega_estimate",

              "px_estimate",

              "flipped_z_estimate",

              "py_estimate",

              "outGoingArmTime",

              "quality_flip_indicator",

              "inGoingArmTime"]


    return var


if __name__ == "__main__":


    parser = get_argument_parser()

    args = parser.parse_args()


    if args.train == '' or args.data == '':

        B2FATAL("Missing train or test samples. Terminating here.")


    if args.tree == '':

        B2FATAL("Missing Treename. Terminating here.")


    if args.mva not in [1, 2]:

        B2FATAL("MVA number must be either 1 or 2. Terminating here.")


    conditions.testing_payloads = ['localdb/database.txt']


    print(args.train)

    training_data = basf2_mva.vector(args.train)

    test_data = basf2_mva.vector(args.data)


    # get the variables

    variables = get_variables(args.mva)


    general_options = basf2_mva.GeneralOptions()

    general_options.m_datafiles = training_data

    '''

    # the official name of the weight file is `TRKTrackFlipAndRefit_MVA1_weightfile`

    # But the evaluation scripts taking `Weightfile` as a default name, so that's why this line was commented

    general_options.m_identifier = "TRKTrackFlipAndRefit_MVA1_weightfile"

    or "TRKTrackFlipAndRefit_MVA2_weightfile" for second MVA

    '''

    general_options.m_identifier = f"TRKTrackFlipAndRefit_MVA{args.mva}_weightfile"

    general_options.m_treename = args.tree

    general_options.m_variables = basf2_mva.vector(*variables)

    general_options.m_target_variable = "ismatched_WC"

    general_options.m_max_events = 0


    fastbdt_options = basf2_mva.FastBDTOptions()

    if args.mva == 1:

        # configurations for MVA1

        fastbdt_options.m_nTrees = 150

        fastbdt_options.m_nCuts = 18

        fastbdt_options.m_nLevels = 4

        fastbdt_options.m_shrinkage = 0.2

        fastbdt_options.m_randRatio = 0.5

        fastbdt_options.m_purityTransformation = False

        fastbdt_options.m_sPlot = False


    if args.mva == 2:

        # configurations for MVA2

        fastbdt_options.m_nTrees = 400

        fastbdt_options.m_nCuts = 25

        fastbdt_options.m_nLevels = 2

        fastbdt_options.m_shrinkage = 0.6

        fastbdt_options.m_randRatio = 0.5

        fastbdt_options.m_purityTransformation = False

        fastbdt_options.m_sPlot = False

    basf2_mva.teacher(general_options, fastbdt_options)


    m = basf2_mva_util.Method(general_options.m_identifier)

    p, t = m.apply_expert(test_data, general_options.m_treename)

    res = basf2_mva_util.calculate_auc_efficiency_vs_background_retention(p, t)


    print(res)

    print("Variable importances returned my method")


    imp = np.array([m.importances.get(v, 0.0) for v in m.variables])

    width = (np.max(imp) - np.min(imp))


    for var in m.variables:

        print(var, (m.importances.get(var, 0.0) - np.min(imp))/width * 100)

basf2_mva_util.calculate_auc_efficiency_vs_background_retention
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)
Definition: basf2_mva_util.py:81

basf2_mva_util.Method
Definition: basf2_mva_util.py:129