15 if __name__ ==
"__main__":
16 from basf2
import conditions
18 conditions.testing_payloads = [
19 'localdb/database.txt'
43 'daughter(0, chiProb)',
44 'daughter(1, chiProb)',
45 'daughter(2, chiProb)',
46 'daughter(0, kaonID)',
47 'daughter(0, pionID)',
48 'daughterInvariantMass(0, 1)',
49 'daughterInvariantMass(0, 2)',
50 'daughterInvariantMass(1, 2)']
53 general_options = basf2_mva.GeneralOptions()
54 general_options.m_datafiles = basf2_mva.vector(
"train.root")
55 general_options.m_treename =
"tree"
56 general_options.m_identifier =
"MVADatabaseIdentifier"
57 general_options.m_variables = basf2_mva.vector(*variables)
58 general_options.m_target_variable =
"isSignal"
60 trivial_options = basf2_mva.TrivialOptions()
62 data_options = basf2_mva.FastBDTOptions()
63 data_options.m_nTrees = 0
65 fastbdt_options = basf2_mva.FastBDTOptions()
66 fastbdt_options.m_nTrees = 100
67 fastbdt_options.m_nCuts = 10
68 fastbdt_options.m_nLevels = 3
69 fastbdt_options.m_shrinkage = 0.2
70 fastbdt_options.m_randRatio = 0.5
72 fann_options = basf2_mva.FANNOptions()
73 fann_options.m_number_of_threads = 1
74 fann_options.m_max_epochs = 100
75 fann_options.m_validation_fraction = 0.001
76 fann_options.m_test_rate = fann_options.m_max_epochs + 1
77 fann_options.m_hidden_layers_architecture =
"N+1"
78 fann_options.m_random_seeds = 1
80 tmva_bdt_options = basf2_mva.TMVAOptionsClassification()
81 tmva_bdt_options.m_config = (
"!H:!V:CreateMVAPdfs:NTrees=100:BoostType=Grad:Shrinkage=0.2:UseBaggedBoost:"
82 "BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining")
83 tmva_bdt_options.m_prepareOptions = (
"SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
84 "nTest_Signal=1:nTest_Background=1")
86 tmva_nn_options = basf2_mva.TMVAOptionsClassification()
87 tmva_nn_options.m_type =
"MLP"
88 tmva_nn_options.m_method =
"MLP"
89 tmva_nn_options.m_config = (
"H:!V:CreateMVAPdfs:VarTransform=N:NCycles=100:HiddenLayers=N+1:TrainingMethod=BFGS")
90 tmva_nn_options.m_prepareOptions = (
"SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
91 "nTest_Signal=1:nTest_Background=1")
93 sklearn_bdt_options = basf2_mva.PythonOptions()
94 sklearn_bdt_options.m_framework =
"sklearn"
95 param =
'{"n_estimators": 100, "learning_rate": 0.2, "max_depth": 3, "random_state": 0, "subsample": 0.5}'
96 sklearn_bdt_options.m_config = param
98 xgboost_options = basf2_mva.PythonOptions()
99 xgboost_options.m_framework =
"xgboost"
100 param = (
'{"max_depth": 3, "eta": 0.1, "silent": 1, "objective": "binary:logistic",'
101 '"subsample": 0.5, "nthread": 1, "nTrees": 400}')
102 xgboost_options.m_config = param
105 test_data = [
"validation.root"]
106 for label, options
in [(
"DataLoading", data_options), (
"FastBDT", fastbdt_options), (
"FANN", fann_options),
107 (
"TMVA-BDT", tmva_bdt_options), (
"TMVA-NN", tmva_nn_options),
108 (
"SKLearn-BDT", sklearn_bdt_options), (
"XGBoost", xgboost_options), (
"Trivial", trivial_options)]:
109 training_start = time.time()
110 general_options.m_identifier = label
111 basf2_mva.teacher(general_options, options)
112 training_stop = time.time()
113 training_time = training_stop - training_start
115 inference_start = time.time()
116 p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
117 inference_stop = time.time()
118 inference_time = inference_stop - inference_start
120 print(label, training_time, inference_time, auc)
121 stats.append((label, training_time, inference_time, auc))
def calculate_roc_auc(p, t)