Belle II Software  release-05-01-25
performance_comparison.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2016
5 
6 import basf2_mva
7 import basf2_mva_util
8 import time
9 
10 if __name__ == "__main__":
11  from basf2 import conditions
12  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
13  conditions.testing_payloads = [
14  'localdb/database.txt'
15  ]
16 
17  variables = ['M', 'p', 'pt', 'pz',
18  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
19  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
20  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
21  'chiProb', 'dr', 'dz',
22  'daughter(0, dr)', 'daughter(1, dr)',
23  'daughter(0, dz)', 'daughter(1, dz)',
24  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
25  'daughter(0, kaonID)', 'daughter(0, pionID)',
26  'daughterInvariantMass(0, 1)', 'daughterInvariantMass(0, 2)', 'daughterInvariantMass(1, 2)']
27 
28  # Train a MVA method and directly upload it to the database
29  general_options = basf2_mva.GeneralOptions()
30  general_options.m_datafiles = basf2_mva.vector("train.root")
31  general_options.m_treename = "tree"
32  general_options.m_identifier = "MVADatabaseIdentifier"
33  general_options.m_variables = basf2_mva.vector(*variables)
34  general_options.m_target_variable = "isSignal"
35 
36  trivial_options = basf2_mva.TrivialOptions()
37 
38  data_options = basf2_mva.FastBDTOptions()
39  data_options.m_nTrees = 0
40 
41  fastbdt_options = basf2_mva.FastBDTOptions()
42  fastbdt_options.m_nTrees = 100
43  fastbdt_options.m_nCuts = 10
44  fastbdt_options.m_nLevels = 3
45  fastbdt_options.m_shrinkage = 0.2
46  fastbdt_options.m_randRatio = 0.5
47 
48  fann_options = basf2_mva.FANNOptions()
49  fann_options.m_number_of_threads = 1
50  fann_options.m_max_epochs = 100
51  fann_options.m_validation_fraction = 0.001
52  fann_options.m_test_rate = fann_options.m_max_epochs + 1 # Never test
53  fann_options.m_hidden_layers_architecture = "N+1"
54  fann_options.m_random_seeds = 1
55 
56  tmva_bdt_options = basf2_mva.TMVAOptionsClassification()
57  tmva_bdt_options.m_config = ("!H:!V:CreateMVAPdfs:NTrees=100:BoostType=Grad:Shrinkage=0.2:UseBaggedBoost:"
58  "BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining")
59  tmva_bdt_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
60  "nTest_Signal=1:nTest_Background=1")
61 
62  tmva_nn_options = basf2_mva.TMVAOptionsClassification()
63  tmva_nn_options.m_type = "MLP"
64  tmva_nn_options.m_method = "MLP"
65  tmva_nn_options.m_config = ("H:!V:CreateMVAPdfs:VarTransform=N:NCycles=100:HiddenLayers=N+1:TrainingMethod=BFGS")
66  tmva_nn_options.m_prepareOptions = ("SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
67  "nTest_Signal=1:nTest_Background=1")
68 
69  sklearn_bdt_options = basf2_mva.PythonOptions()
70  sklearn_bdt_options.m_framework = "sklearn"
71  param = '{"n_estimators": 100, "learning_rate": 0.2, "max_depth": 3, "random_state": 0, "subsample": 0.5}'
72  sklearn_bdt_options.m_config = param
73 
74  xgboost_options = basf2_mva.PythonOptions()
75  xgboost_options.m_framework = "xgboost"
76  param = ('{"max_depth": 3, "eta": 0.1, "silent": 1, "objective": "binary:logistic",'
77  '"subsample": 0.5, "nthread": 1, "nTrees": 400}')
78  xgboost_options.m_config = param
79 
80  stats = []
81  test_data = ["validation.root"]
82  for label, options in [("DataLoading", data_options), ("FastBDT", fastbdt_options), ("FANN", fann_options),
83  ("TMVA-BDT", tmva_bdt_options), ("TMVA-NN", tmva_nn_options),
84  ("SKLearn-BDT", sklearn_bdt_options), ("XGBoost", xgboost_options), ("Trivial", trivial_options)]:
85  training_start = time.time()
86  general_options.m_identifier = label
87  basf2_mva.teacher(general_options, options)
88  training_stop = time.time()
89  training_time = training_stop - training_start
90  method = basf2_mva_util.Method(general_options.m_identifier)
91  inference_start = time.time()
92  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
93  inference_stop = time.time()
94  inference_time = inference_stop - inference_start
96  print(label, training_time, inference_time, auc)
97  stats.append((label, training_time, inference_time, auc))
98 
99  for l in stats:
100  print(*l)
basf2_mva_util.calculate_roc_auc
def calculate_roc_auc(p, t)
Definition: basf2_mva_util.py:39
basf2_mva_util.Method
Definition: basf2_mva_util.py:81