16 if __name__ ==
"__main__":
17 from basf2
import conditions
19 conditions.testing_payloads = [
20 'localdb/database.txt'
22 train_file = basf2.find_file(
"mva/train_D0toKpipi.root",
"examples")
23 test_file = basf2.find_file(
"mva/test_D0toKpipi.root",
"examples")
25 training_data = basf2_mva.vector(train_file)
26 testing_data = basf2_mva.vector(test_file)
49 'daughter(0, chiProb)',
50 'daughter(1, chiProb)',
51 'daughter(2, chiProb)',
52 'daughter(0, kaonID)',
53 'daughter(0, pionID)',
59 general_options = basf2_mva.GeneralOptions()
60 general_options.m_datafiles = training_data
61 general_options.m_treename =
"tree"
62 general_options.m_identifier =
"MVADatabaseIdentifier"
63 general_options.m_variables = basf2_mva.vector(*variables)
64 general_options.m_target_variable =
"isSignal"
66 trivial_options = basf2_mva.TrivialOptions()
68 data_options = basf2_mva.FastBDTOptions()
69 data_options.m_nTrees = 0
71 fastbdt_options = basf2_mva.FastBDTOptions()
72 fastbdt_options.m_nTrees = 100
73 fastbdt_options.m_nCuts = 10
74 fastbdt_options.m_nLevels = 3
75 fastbdt_options.m_shrinkage = 0.2
76 fastbdt_options.m_randRatio = 0.5
78 fann_options = basf2_mva.FANNOptions()
79 fann_options.m_number_of_threads = 1
80 fann_options.m_max_epochs = 100
81 fann_options.m_validation_fraction = 0.001
82 fann_options.m_test_rate = fann_options.m_max_epochs + 1
83 fann_options.m_hidden_layers_architecture =
"N+1"
84 fann_options.m_random_seeds = 1
86 tmva_bdt_options = basf2_mva.TMVAOptionsClassification()
87 tmva_bdt_options.m_config = (
"!H:!V:CreateMVAPdfs:NTrees=100:BoostType=Grad:Shrinkage=0.2:UseBaggedBoost:"
88 "BaggedSampleFraction=0.5:nCuts=1024:MaxDepth=3:IgnoreNegWeightsInTraining")
89 tmva_bdt_options.m_prepareOptions = (
"SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
90 "nTest_Signal=1:nTest_Background=1")
92 tmva_nn_options = basf2_mva.TMVAOptionsClassification()
93 tmva_nn_options.m_type =
"MLP"
94 tmva_nn_options.m_method =
"MLP"
95 tmva_nn_options.m_config = (
"H:!V:CreateMVAPdfs:VarTransform=N:NCycles=100:HiddenLayers=N+1:TrainingMethod=BFGS")
96 tmva_nn_options.m_prepareOptions = (
"SplitMode=block:V:nTrain_Signal=9691:nTrain_Background=136972:"
97 "nTest_Signal=1:nTest_Background=1")
99 sklearn_bdt_options = basf2_mva.PythonOptions()
100 sklearn_bdt_options.m_framework =
"sklearn"
101 param =
'{"n_estimators": 100, "learning_rate": 0.2, "max_depth": 3, "random_state": 0, "subsample": 0.5}'
102 sklearn_bdt_options.m_config = param
104 xgboost_options = basf2_mva.PythonOptions()
105 xgboost_options.m_framework =
"xgboost"
106 param = (
'{"max_depth": 3, "eta": 0.1, "silent": 1, "objective": "binary:logistic",'
107 '"subsample": 0.5, "nthread": 1, "nTrees": 400}')
108 xgboost_options.m_config = param
111 for label, options
in [(
"DataLoading", data_options), (
"FastBDT", fastbdt_options), (
"FANN", fann_options),
112 (
"TMVA-BDT", tmva_bdt_options), (
"TMVA-NN", tmva_nn_options),
113 (
"SKLearn-BDT", sklearn_bdt_options), (
"XGBoost", xgboost_options), (
"Trivial", trivial_options)]:
114 training_start = time.time()
115 general_options.m_identifier = label
116 basf2_mva.teacher(general_options, options)
117 training_stop = time.time()
118 training_time = training_stop - training_start
120 inference_start = time.time()
121 p, t = method.apply_expert(basf2_mva.vector(testing_data), general_options.m_treename)
122 inference_stop = time.time()
123 inference_time = inference_stop - inference_start
125 print(label, training_time, inference_time, auc)
126 stats.append((label, training_time, inference_time, auc))
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)