10 if __name__ ==
"__main__":
11 from basf2
import conditions
13 conditions.testing_payloads = [
14 'localdb/database.txt'
17 variables = [
'M',
'p',
'pt',
'pz',
18 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
19 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
20 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
21 'chiProb',
'dr',
'dz',
22 'daughter(0, dr)',
'daughter(1, dr)',
23 'daughter(0, dz)',
'daughter(1, dz)',
24 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
25 'daughter(0, kaonID)',
'daughter(0, pionID)',
26 'daughterInvariantMass(0, 1)',
'daughterInvariantMass(0, 2)',
'daughterInvariantMass(1, 2)']
29 general_options = basf2_mva.GeneralOptions()
30 general_options.m_datafiles = basf2_mva.vector(
"train.root")
31 general_options.m_treename =
"tree"
32 general_options.m_identifier =
"MVADatabaseIdentifier"
33 general_options.m_variables = basf2_mva.vector(*variables)
34 general_options.m_target_variable =
"isSignal"
36 trivial_options = basf2_mva.TrivialOptions()
38 data_options = basf2_mva.FastBDTOptions()
39 data_options.m_nTrees = 0
41 fastbdt_options = basf2_mva.FastBDTOptions()
42 fastbdt_options.m_nTrees = 100
43 fastbdt_options.m_nCuts = 10
44 fastbdt_options.m_nLevels = 3
45 fastbdt_options.m_shrinkage = 0.2
46 fastbdt_options.m_randRatio = 0.5
48 fann_options = basf2_mva.FANNOptions()
49 fann_options.m_number_of_threads = 1
50 fann_options.m_max_epochs = 100
51 fann_options.m_validation_fraction = 0.001
52 fann_options.m_test_rate = fann_options.m_max_epochs + 1
53 fann_options.m_hidden_layers_architecture =
"N+1"
54 fann_options.m_random_seeds = 1
56 tmva_bdt_options = basf2_mva.TMVAOptionsClassification()
57 tmva_bdt_options.m_config = (
"!H:!V:CreateMVAPdfs:NTrees=100:BoostType=Grad:Shrinkage=0.2:UseBaggedBoost:"
58 "BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining")
59 tmva_bdt_options.m_prepareOptions = (
"SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
60 "nTest_Signal=1:nTest_Background=1")
62 tmva_nn_options = basf2_mva.TMVAOptionsClassification()
63 tmva_nn_options.m_type =
"MLP"
64 tmva_nn_options.m_method =
"MLP"
65 tmva_nn_options.m_config = (
"H:!V:CreateMVAPdfs:VarTransform=N:NCycles=100:HiddenLayers=N+1:TrainingMethod=BFGS")
66 tmva_nn_options.m_prepareOptions = (
"SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
67 "nTest_Signal=1:nTest_Background=1")
69 sklearn_bdt_options = basf2_mva.PythonOptions()
70 sklearn_bdt_options.m_framework =
"sklearn"
71 param =
'{"n_estimators": 100, "learning_rate": 0.2, "max_depth": 3, "random_state": 0, "subsample": 0.5}'
72 sklearn_bdt_options.m_config = param
74 xgboost_options = basf2_mva.PythonOptions()
75 xgboost_options.m_framework =
"xgboost"
76 param = (
'{"max_depth": 3, "eta": 0.1, "silent": 1, "objective": "binary:logistic",'
77 '"subsample": 0.5, "nthread": 1, "nTrees": 400}')
78 xgboost_options.m_config = param
81 test_data = [
"validation.root"]
82 for label, options
in [(
"DataLoading", data_options), (
"FastBDT", fastbdt_options), (
"FANN", fann_options),
83 (
"TMVA-BDT", tmva_bdt_options), (
"TMVA-NN", tmva_nn_options),
84 (
"SKLearn-BDT", sklearn_bdt_options), (
"XGBoost", xgboost_options), (
"Trivial", trivial_options)]:
85 training_start = time.time()
86 general_options.m_identifier = label
87 basf2_mva.teacher(general_options, options)
88 training_stop = time.time()
89 training_time = training_stop - training_start
91 inference_start = time.time()
92 p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
93 inference_stop = time.time()
94 inference_time = inference_stop - inference_start
96 print(label, training_time, inference_time, auc)
97 stats.append((label, training_time, inference_time, auc))