Belle II Software development
purity_transformation.py
1#!/usr/bin/env python3
2
3
10
11import basf2_mva
12import basf2_mva_util
13import time
14
15if __name__ == "__main__":
16 from basf2 import conditions, find_file
17 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
18 conditions.testing_payloads = [
19 'localdb/database.txt'
20 ]
21
22 train_file = find_file("mva/train_D0toKpipi.root", "examples")
23 test_file = find_file("mva/test_D0toKpipi.root", "examples")
24
25 training_data = basf2_mva.vector(train_file)
26 testing_data = basf2_mva.vector(test_file)
27
28 variables = [
29 'M',
30 'p',
31 'pt',
32 'pz',
33 'daughter(0, p)',
34 'daughter(0, pz)',
35 'daughter(0, pt)',
36 'daughter(1, p)',
37 'daughter(1, pz)',
38 'daughter(1, pt)',
39 'daughter(2, p)',
40 'daughter(2, pz)',
41 'daughter(2, pt)',
42 'chiProb',
43 'dr',
44 'dz',
45 'daughter(0, dr)',
46 'daughter(1, dr)',
47 'daughter(0, dz)',
48 'daughter(1, dz)',
49 'daughter(0, chiProb)',
50 'daughter(1, chiProb)',
51 'daughter(2, chiProb)',
52 'daughter(0, kaonID)',
53 'daughter(0, pionID)',
54 'daughterInvM(0, 1)',
55 'daughterInvM(0, 2)',
56 'daughterInvM(1, 2)']
57
58 # Train a MVA method and directly upload it to the database
59 general_options = basf2_mva.GeneralOptions()
60 general_options.m_datafiles = training_data
61 general_options.m_treename = "tree"
62 general_options.m_identifier = "MVADatabaseIdentifier"
63 general_options.m_variables = basf2_mva.vector(*variables)
64 general_options.m_target_variable = "isSignal"
65
66 fastbdt_options = basf2_mva.FastBDTOptions()
67 fastbdt_options.m_nTrees = 100
68 fastbdt_options.m_nCuts = 10
69 fastbdt_options.m_nLevels = 3
70 fastbdt_options.m_shrinkage = 0.2
71 fastbdt_options.m_randRatio = 0.5
72
73 fastbdt_pt_options = basf2_mva.FastBDTOptions()
74 fastbdt_pt_options.m_nTrees = 100
75 fastbdt_pt_options.m_nCuts = 10
76 fastbdt_pt_options.m_nLevels = 3
77 fastbdt_pt_options.m_shrinkage = 0.2
78 fastbdt_pt_options.m_randRatio = 0.5
79 fastbdt_pt_options.m_purityTransformation = True
80
81 stats = []
82 for label, options in [("FastBDT", fastbdt_options), ("FastBDT_PT", fastbdt_pt_options)]:
83 training_start = time.time()
84 general_options.m_identifier = label
85 basf2_mva.teacher(general_options, options)
86 training_stop = time.time()
87 training_time = training_stop - training_start
88 method = basf2_mva_util.Method(general_options.m_identifier)
89 inference_start = time.time()
90 p, t = method.apply_expert(testing_data, general_options.m_treename)
91 inference_stop = time.time()
92 inference_time = inference_stop - inference_start
94 print(label, training_time, inference_time, auc)
95 stats.append((label, training_time, inference_time, auc))
96
97 for line in stats:
98 print(*line)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)