Belle II Software  release-08-01-10
purity_transformation.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2_mva
12 import basf2_mva_util
13 import time
14 
15 if __name__ == "__main__":
16  from basf2 import conditions, find_file
17  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
18  conditions.testing_payloads = [
19  'localdb/database.txt'
20  ]
21 
22  train_file = find_file("mva/train_D0toKpipi.root", "examples")
23  test_file = find_file("mva/test_D0toKpipi.root", "examples")
24 
25  training_data = basf2_mva.vector(train_file)
26  testing_data = basf2_mva.vector(test_file)
27 
28  variables = [
29  'M',
30  'p',
31  'pt',
32  'pz',
33  'daughter(0, p)',
34  'daughter(0, pz)',
35  'daughter(0, pt)',
36  'daughter(1, p)',
37  'daughter(1, pz)',
38  'daughter(1, pt)',
39  'daughter(2, p)',
40  'daughter(2, pz)',
41  'daughter(2, pt)',
42  'chiProb',
43  'dr',
44  'dz',
45  'daughter(0, dr)',
46  'daughter(1, dr)',
47  'daughter(0, dz)',
48  'daughter(1, dz)',
49  'daughter(0, chiProb)',
50  'daughter(1, chiProb)',
51  'daughter(2, chiProb)',
52  'daughter(0, kaonID)',
53  'daughter(0, pionID)',
54  'daughterInvM(0, 1)',
55  'daughterInvM(0, 2)',
56  'daughterInvM(1, 2)']
57 
58  # Train a MVA method and directly upload it to the database
59  general_options = basf2_mva.GeneralOptions()
60  general_options.m_datafiles = training_data
61  general_options.m_treename = "tree"
62  general_options.m_identifier = "MVADatabaseIdentifier"
63  general_options.m_variables = basf2_mva.vector(*variables)
64  general_options.m_target_variable = "isSignal"
65 
66  fastbdt_options = basf2_mva.FastBDTOptions()
67  fastbdt_options.m_nTrees = 100
68  fastbdt_options.m_nCuts = 10
69  fastbdt_options.m_nLevels = 3
70  fastbdt_options.m_shrinkage = 0.2
71  fastbdt_options.m_randRatio = 0.5
72 
73  fastbdt_pt_options = basf2_mva.FastBDTOptions()
74  fastbdt_pt_options.m_nTrees = 100
75  fastbdt_pt_options.m_nCuts = 10
76  fastbdt_pt_options.m_nLevels = 3
77  fastbdt_pt_options.m_shrinkage = 0.2
78  fastbdt_pt_options.m_randRatio = 0.5
79  fastbdt_pt_options.m_purityTransformation = True
80 
81  stats = []
82  for label, options in [("FastBDT", fastbdt_options), ("FastBDT_PT", fastbdt_pt_options)]:
83  training_start = time.time()
84  general_options.m_identifier = label
85  basf2_mva.teacher(general_options, options)
86  training_stop = time.time()
87  training_time = training_stop - training_start
88  method = basf2_mva_util.Method(general_options.m_identifier)
89  inference_start = time.time()
90  p, t = method.apply_expert(testing_data, general_options.m_treename)
91  inference_stop = time.time()
92  inference_time = inference_stop - inference_start
94  print(label, training_time, inference_time, auc)
95  stats.append((label, training_time, inference_time, auc))
96 
97  for line in stats:
98  print(*line)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)