Belle II Software development
hep_ml_uboost.py
1#!/usr/bin/env python3
2
3
10
11import basf2_mva
12import basf2_mva_util
13import subprocess
14import time
15
16if __name__ == "__main__":
17 from basf2 import conditions, find_file
18 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
19 conditions.testing_payloads = [
20 'localdb/database.txt'
21 ]
22
23 variables = ['p', 'pt', 'pz',
24 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
25 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
26 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
27 'chiProb', 'dr', 'dz',
28 'daughter(0, dr)', 'daughter(1, dr)',
29 'daughter(0, dz)', 'daughter(1, dz)',
30 'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
31 'daughter(0, kaonID)', 'daughter(0, pionID)',
32 'daughterInvM(0, 1)', 'daughterInvM(0, 2)', 'daughterInvM(1, 2)']
33
34 train_file = find_file("mva/train_D0toKpipi.root", "examples")
35 test_file = find_file("mva/test_D0toKpipi.root", "examples")
36
37 training_data = basf2_mva.vector(train_file)
38 testing_data = basf2_mva.vector(test_file)
39
40 general_options = basf2_mva.GeneralOptions()
41 general_options.m_datafiles = training_data
42 general_options.m_treename = "tree"
43 general_options.m_variables = basf2_mva.vector(*variables)
44 # Spectators are the variables for which the selection should be uniform
45 general_options.m_spectators = basf2_mva.vector('M')
46 general_options.m_target_variable = "isSignal"
47 general_options.m_identifier = "HepMLUBoost"
48
49 specific_options = basf2_mva.PythonOptions()
50 specific_options.m_steering_file = 'mva/examples/python/hep_ml_uboost.py'
51 # Set the parameters of the uBoostClassifier,
52 # defaults are 50, which is reasonable, but I want to have a example runtime < 2 minutes
53 import json
54 specific_options.m_config = json.dumps({'n_neighbors': 5, 'n_estimators': 5})
55 specific_options.m_framework = 'hep_ml'
56
57 training_start = time.time()
58 basf2_mva.teacher(general_options, specific_options)
59 training_stop = time.time()
60 training_time = training_stop - training_start
61 method = basf2_mva_util.Method(general_options.m_identifier)
62 inference_start = time.time()
63 p, t = method.apply_expert(testing_data, general_options.m_treename)
64 inference_stop = time.time()
65 inference_time = inference_stop - inference_start
67 print("HepML", training_time, inference_time, auc)
68
69 subprocess.call(f'basf2_mva_evaluate.py -c -o latex.pdf -train {train_file} -data {test_file} -i HepMLUBoost', shell=True)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)