Belle II Software  release-06-02-00
performance_comparison.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2_mva
12 import basf2_mva_util
13 import time
14 
15 if __name__ == "__main__":
16  from basf2 import conditions
17  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
18  conditions.testing_payloads = [
19  'localdb/database.txt'
20  ]
21 
22  variables = [
23  'M',
24  'p',
25  'pt',
26  'pz',
27  'daughter(0, p)',
28  'daughter(0, pz)',
29  'daughter(0, pt)',
30  'daughter(1, p)',
31  'daughter(1, pz)',
32  'daughter(1, pt)',
33  'daughter(2, p)',
34  'daughter(2, pz)',
35  'daughter(2, pt)',
36  'chiProb',
37  'dr',
38  'dz',
39  'daughter(0, dr)',
40  'daughter(1, dr)',
41  'daughter(0, dz)',
42  'daughter(1, dz)',
43  'daughter(0, chiProb)',
44  'daughter(1, chiProb)',
45  'daughter(2, chiProb)',
46  'daughter(0, kaonID)',
47  'daughter(0, pionID)',
48  'daughterInvariantMass(0, 1)',
49  'daughterInvariantMass(0, 2)',
50  'daughterInvariantMass(1, 2)']
51 
52  # Train a MVA method and directly upload it to the database
53  general_options = basf2_mva.GeneralOptions()
54  general_options.m_datafiles = basf2_mva.vector("train.root")
55  general_options.m_treename = "tree"
56  general_options.m_identifier = "MVADatabaseIdentifier"
57  general_options.m_variables = basf2_mva.vector(*variables)
58  general_options.m_target_variable = "isSignal"
59 
60  trivial_options = basf2_mva.TrivialOptions()
61 
62  data_options = basf2_mva.FastBDTOptions()
63  data_options.m_nTrees = 0
64 
65  fastbdt_options = basf2_mva.FastBDTOptions()
66  fastbdt_options.m_nTrees = 100
67  fastbdt_options.m_nCuts = 10
68  fastbdt_options.m_nLevels = 3
69  fastbdt_options.m_shrinkage = 0.2
70  fastbdt_options.m_randRatio = 0.5
71 
72  fann_options = basf2_mva.FANNOptions()
73  fann_options.m_number_of_threads = 1
74  fann_options.m_max_epochs = 100
75  fann_options.m_validation_fraction = 0.001
76  fann_options.m_test_rate = fann_options.m_max_epochs + 1 # Never test
77  fann_options.m_hidden_layers_architecture = "N+1"
78  fann_options.m_random_seeds = 1
79 
80  tmva_bdt_options = basf2_mva.TMVAOptionsClassification()
81  tmva_bdt_options.m_config = ("!H:!V:CreateMVAPdfs:NTrees=100:BoostType=Grad:Shrinkage=0.2:UseBaggedBoost:"
82  "BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining")
83  tmva_bdt_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
84  "nTest_Signal=1:nTest_Background=1")
85 
86  tmva_nn_options = basf2_mva.TMVAOptionsClassification()
87  tmva_nn_options.m_type = "MLP"
88  tmva_nn_options.m_method = "MLP"
89  tmva_nn_options.m_config = ("H:!V:CreateMVAPdfs:VarTransform=N:NCycles=100:HiddenLayers=N+1:TrainingMethod=BFGS")
90  tmva_nn_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
91  "nTest_Signal=1:nTest_Background=1")
92 
93  sklearn_bdt_options = basf2_mva.PythonOptions()
94  sklearn_bdt_options.m_framework = "sklearn"
95  param = '{"n_estimators": 100, "learning_rate": 0.2, "max_depth": 3, "random_state": 0, "subsample": 0.5}'
96  sklearn_bdt_options.m_config = param
97 
98  xgboost_options = basf2_mva.PythonOptions()
99  xgboost_options.m_framework = "xgboost"
100  param = ('{"max_depth": 3, "eta": 0.1, "silent": 1, "objective": "binary:logistic",'
101  '"subsample": 0.5, "nthread": 1, "nTrees": 400}')
102  xgboost_options.m_config = param
103 
104  stats = []
105  test_data = ["validation.root"]
106  for label, options in [("DataLoading", data_options), ("FastBDT", fastbdt_options), ("FANN", fann_options),
107  ("TMVA-BDT", tmva_bdt_options), ("TMVA-NN", tmva_nn_options),
108  ("SKLearn-BDT", sklearn_bdt_options), ("XGBoost", xgboost_options), ("Trivial", trivial_options)]:
109  training_start = time.time()
110  general_options.m_identifier = label
111  basf2_mva.teacher(general_options, options)
112  training_stop = time.time()
113  training_time = training_stop - training_start
114  method = basf2_mva_util.Method(general_options.m_identifier)
115  inference_start = time.time()
116  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
117  inference_stop = time.time()
118  inference_time = inference_stop - inference_start
120  print(label, training_time, inference_time, auc)
121  stats.append((label, training_time, inference_time, auc))
122 
123  for line in stats:
124  print(*line)
def calculate_roc_auc(p, t)