Belle II Software development
performance_comparison.py
1#!/usr/bin/env python3
2
3
10
11import basf2
12import basf2_mva
13import basf2_mva_util
14import time
15
16if __name__ == "__main__":
17 from basf2 import conditions
18 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
19 conditions.testing_payloads = [
20 'localdb/database.txt'
21 ]
22 train_file = basf2.find_file("mva/train_D0toKpipi.root", "examples")
23 test_file = basf2.find_file("mva/test_D0toKpipi.root", "examples")
24
25 training_data = basf2_mva.vector(train_file)
26 testing_data = basf2_mva.vector(test_file)
27
28 variables = [
29 'M',
30 'p',
31 'pt',
32 'pz',
33 'daughter(0, p)',
34 'daughter(0, pz)',
35 'daughter(0, pt)',
36 'daughter(1, p)',
37 'daughter(1, pz)',
38 'daughter(1, pt)',
39 'daughter(2, p)',
40 'daughter(2, pz)',
41 'daughter(2, pt)',
42 'chiProb',
43 'dr',
44 'dz',
45 'daughter(0, dr)',
46 'daughter(1, dr)',
47 'daughter(0, dz)',
48 'daughter(1, dz)',
49 'daughter(0, chiProb)',
50 'daughter(1, chiProb)',
51 'daughter(2, chiProb)',
52 'daughter(0, kaonID)',
53 'daughter(0, pionID)',
54 'daughterInvM(0, 1)',
55 'daughterInvM(0, 2)',
56 'daughterInvM(1, 2)']
57
58 # Train a MVA method and directly upload it to the database
59 general_options = basf2_mva.GeneralOptions()
60 general_options.m_datafiles = training_data
61 general_options.m_treename = "tree"
62 general_options.m_identifier = "MVADatabaseIdentifier"
63 general_options.m_variables = basf2_mva.vector(*variables)
64 general_options.m_target_variable = "isSignal"
65
66 trivial_options = basf2_mva.TrivialOptions()
67
68 data_options = basf2_mva.FastBDTOptions()
69 data_options.m_nTrees = 0
70
71 fastbdt_options = basf2_mva.FastBDTOptions()
72 fastbdt_options.m_nTrees = 100
73 fastbdt_options.m_nCuts = 10
74 fastbdt_options.m_nLevels = 3
75 fastbdt_options.m_shrinkage = 0.2
76 fastbdt_options.m_randRatio = 0.5
77
78 fann_options = basf2_mva.FANNOptions()
79 fann_options.m_number_of_threads = 1
80 fann_options.m_max_epochs = 100
81 fann_options.m_validation_fraction = 0.001
82 fann_options.m_test_rate = fann_options.m_max_epochs + 1 # Never test
83 fann_options.m_hidden_layers_architecture = "N+1"
84 fann_options.m_random_seeds = 1
85
86 tmva_bdt_options = basf2_mva.TMVAOptionsClassification()
87 tmva_bdt_options.m_config = ("!H:!V:CreateMVAPdfs:NTrees=100:BoostType=Grad:Shrinkage=0.2:UseBaggedBoost:"
88 "BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining")
89 tmva_bdt_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
90 "nTest_Signal=1:nTest_Background=1")
91
92 tmva_nn_options = basf2_mva.TMVAOptionsClassification()
93 tmva_nn_options.m_type = "MLP"
94 tmva_nn_options.m_method = "MLP"
95 tmva_nn_options.m_config = ("H:!V:CreateMVAPdfs:VarTransform=N:NCycles=100:HiddenLayers=N+1:TrainingMethod=BFGS")
96 tmva_nn_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
97 "nTest_Signal=1:nTest_Background=1")
98
99 sklearn_bdt_options = basf2_mva.PythonOptions()
100 sklearn_bdt_options.m_framework = "sklearn"
101 param = '{"n_estimators": 100, "learning_rate": 0.2, "max_depth": 3, "random_state": 0, "subsample": 0.5}'
102 sklearn_bdt_options.m_config = param
103
104 xgboost_options = basf2_mva.PythonOptions()
105 xgboost_options.m_framework = "xgboost"
106 param = ('{"max_depth": 3, "eta": 0.1, "silent": 1, "objective": "binary:logistic",'
107 '"subsample": 0.5, "nthread": 1, "nTrees": 400}')
108 xgboost_options.m_config = param
109
110 stats = []
111 for label, options in [("DataLoading", data_options), ("FastBDT", fastbdt_options), ("FANN", fann_options),
112 ("TMVA-BDT", tmva_bdt_options), ("TMVA-NN", tmva_nn_options),
113 ("SKLearn-BDT", sklearn_bdt_options), ("XGBoost", xgboost_options), ("Trivial", trivial_options)]:
114 training_start = time.time()
115 general_options.m_identifier = label
116 basf2_mva.teacher(general_options, options)
117 training_stop = time.time()
118 training_time = training_stop - training_start
119 method = basf2_mva_util.Method(general_options.m_identifier)
120 inference_start = time.time()
121 p, t = method.apply_expert(basf2_mva.vector(testing_data), general_options.m_treename)
122 inference_stop = time.time()
123 inference_time = inference_stop - inference_start
125 print(label, training_time, inference_time, auc)
126 stats.append((label, training_time, inference_time, auc))
127
128 for line in stats:
129 print(*line)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)