Belle II Software  release-08-01-10
fastbdt_ugboost.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2_mva
12 
13 if __name__ == "__main__":
14  from basf2 import conditions, find_file
15  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
16  conditions.testing_payloads = [
17  'localdb/database.txt'
18  ]
19 
20  variables = ['p', 'pt', 'pz', 'phi',
21  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
22  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
23  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
24  'chiProb', 'dr', 'dz', 'dphi',
25  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
26  'daughter(0, dphi)', 'daughter(1, dphi)',
27  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
28  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
29  'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
30  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
31  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
32  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
33  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
34  'M']
35 
36  variables2 = ['p', 'pt', 'pz', 'phi',
37  'chiProb', 'dr', 'dz', 'dphi',
38  'daughter(2, chiProb)',
39  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
40  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
41  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
42  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
43  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))']
44 
45  train_file = find_file("mva/train_D0toKpipi.root", "examples")
46  training_data = basf2_mva.vector(train_file)
47 
48  general_options = basf2_mva.GeneralOptions()
49  general_options.m_datafiles = training_data
50  general_options.m_treename = "tree"
51  general_options.m_variables = basf2_mva.vector(*variables)
52  # Spectators are the variables for which the selection should be uniform
53  general_options.m_spectators = basf2_mva.vector('daughterInvM(0, 1)', 'daughterInvM(0, 2)')
54  general_options.m_target_variable = "isSignal"
55  general_options.m_identifier = "fastbdt"
56 
57  fastbdt_options = basf2_mva.FastBDTOptions()
58  fastbdt_options.m_nTrees = 100
59  fastbdt_options.m_nCuts = 10
60  fastbdt_options.m_nLevels = 5
61  fastbdt_options.m_shrinkage = 0.1
62  fastbdt_options.m_randRatio = 0.5
63  fastbdt_options.m_flatnessLoss = 10.0
64 
65  basf2_mva.teacher(general_options, fastbdt_options)
66 
67  general_options.m_identifier = "fastbdt_baseline"
68  fastbdt_options.m_flatnessLoss = -1.0
69  basf2_mva.teacher(general_options, fastbdt_options)
70 
71  general_options.m_identifier = "fastbdt_drop_feature"
72  fastbdt_options.m_flatnessLoss = -1.0
73  general_options.m_variables = basf2_mva.vector(*variables2)
74  basf2_mva.teacher(general_options, fastbdt_options)