Belle II Software  release-08-01-10
performance_comparison.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2
12 import basf2_mva
13 import basf2_mva_util
14 import time
15 
16 if __name__ == "__main__":
17  from basf2 import conditions
18  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
19  conditions.testing_payloads = [
20  'localdb/database.txt'
21  ]
22  train_file = basf2.find_file("mva/train_D0toKpipi.root", "examples")
23  test_file = basf2.find_file("mva/test_D0toKpipi.root", "examples")
24 
25  training_data = basf2_mva.vector(train_file)
26  testing_data = basf2_mva.vector(test_file)
27 
28  variables = [
29  'M',
30  'p',
31  'pt',
32  'pz',
33  'daughter(0, p)',
34  'daughter(0, pz)',
35  'daughter(0, pt)',
36  'daughter(1, p)',
37  'daughter(1, pz)',
38  'daughter(1, pt)',
39  'daughter(2, p)',
40  'daughter(2, pz)',
41  'daughter(2, pt)',
42  'chiProb',
43  'dr',
44  'dz',
45  'daughter(0, dr)',
46  'daughter(1, dr)',
47  'daughter(0, dz)',
48  'daughter(1, dz)',
49  'daughter(0, chiProb)',
50  'daughter(1, chiProb)',
51  'daughter(2, chiProb)',
52  'daughter(0, kaonID)',
53  'daughter(0, pionID)',
54  'daughterInvM(0, 1)',
55  'daughterInvM(0, 2)',
56  'daughterInvM(1, 2)']
57 
58  # Train a MVA method and directly upload it to the database
59  general_options = basf2_mva.GeneralOptions()
60  general_options.m_datafiles = training_data
61  general_options.m_treename = "tree"
62  general_options.m_identifier = "MVADatabaseIdentifier"
63  general_options.m_variables = basf2_mva.vector(*variables)
64  general_options.m_target_variable = "isSignal"
65 
66  trivial_options = basf2_mva.TrivialOptions()
67 
68  data_options = basf2_mva.FastBDTOptions()
69  data_options.m_nTrees = 0
70 
71  fastbdt_options = basf2_mva.FastBDTOptions()
72  fastbdt_options.m_nTrees = 100
73  fastbdt_options.m_nCuts = 10
74  fastbdt_options.m_nLevels = 3
75  fastbdt_options.m_shrinkage = 0.2
76  fastbdt_options.m_randRatio = 0.5
77 
78  fann_options = basf2_mva.FANNOptions()
79  fann_options.m_number_of_threads = 1
80  fann_options.m_max_epochs = 100
81  fann_options.m_validation_fraction = 0.001
82  fann_options.m_test_rate = fann_options.m_max_epochs + 1 # Never test
83  fann_options.m_hidden_layers_architecture = "N+1"
84  fann_options.m_random_seeds = 1
85 
86  tmva_bdt_options = basf2_mva.TMVAOptionsClassification()
87  tmva_bdt_options.m_config = ("!H:!V:CreateMVAPdfs:NTrees=100:BoostType=Grad:Shrinkage=0.2:UseBaggedBoost:"
88  "BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining")
89  tmva_bdt_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
90  "nTest_Signal=1:nTest_Background=1")
91 
92  tmva_nn_options = basf2_mva.TMVAOptionsClassification()
93  tmva_nn_options.m_type = "MLP"
94  tmva_nn_options.m_method = "MLP"
95  tmva_nn_options.m_config = ("H:!V:CreateMVAPdfs:VarTransform=N:NCycles=100:HiddenLayers=N+1:TrainingMethod=BFGS")
96  tmva_nn_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
97  "nTest_Signal=1:nTest_Background=1")
98 
99  sklearn_bdt_options = basf2_mva.PythonOptions()
100  sklearn_bdt_options.m_framework = "sklearn"
101  param = '{"n_estimators": 100, "learning_rate": 0.2, "max_depth": 3, "random_state": 0, "subsample": 0.5}'
102  sklearn_bdt_options.m_config = param
103 
104  xgboost_options = basf2_mva.PythonOptions()
105  xgboost_options.m_framework = "xgboost"
106  param = ('{"max_depth": 3, "eta": 0.1, "silent": 1, "objective": "binary:logistic",'
107  '"subsample": 0.5, "nthread": 1, "nTrees": 400}')
108  xgboost_options.m_config = param
109 
110  stats = []
111  for label, options in [("DataLoading", data_options), ("FastBDT", fastbdt_options), ("FANN", fann_options),
112  ("TMVA-BDT", tmva_bdt_options), ("TMVA-NN", tmva_nn_options),
113  ("SKLearn-BDT", sklearn_bdt_options), ("XGBoost", xgboost_options), ("Trivial", trivial_options)]:
114  training_start = time.time()
115  general_options.m_identifier = label
116  basf2_mva.teacher(general_options, options)
117  training_stop = time.time()
118  training_time = training_stop - training_start
119  method = basf2_mva_util.Method(general_options.m_identifier)
120  inference_start = time.time()
121  p, t = method.apply_expert(basf2_mva.vector(testing_data), general_options.m_treename)
122  inference_stop = time.time()
123  inference_time = inference_stop - inference_start
125  print(label, training_time, inference_time, auc)
126  stats.append((label, training_time, inference_time, auc))
127 
128  for line in stats:
129  print(*line)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)