Belle II Software  release-08-01-10
builtin_splot.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2_mva
12 
13 if __name__ == "__main__":
14  variables = ['p', 'pt', 'pz', 'phi',
15  'chiProb', 'dr', 'dz', 'dphi',
16  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
17  'daughter(0, dphi)', 'daughter(1, dphi)',
18  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
19  'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
20  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
21  'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
22  'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
23  'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
24  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
25  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
26  # We do not use kinematic variables of the daughters
27  # 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
28  # 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
29  # 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
30  # 'daughterInvM(1, 2)', 'daughterInvM(0, 1)', 'daughterInvM(0, 2)',
31  # 'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
32  'M',
33  ]
34 
35  # Perform an sPlot training
36  general_options = basf2_mva.GeneralOptions()
37  general_options.m_datafiles = basf2_mva.vector("train_mc.root")
38  general_options.m_identifier = "MVAFull"
39  general_options.m_treename = "tree"
40  general_options.m_variables = basf2_mva.vector(*variables)
41  general_options.m_target_variable = "isSignal"
42 
43  fastbdt_options = basf2_mva.FastBDTOptions()
44  # SPlot is more stable if one doesn't use the randRatio
45  # FastBDT has a special sPlot mode, but which isn't implemented yet in the mva package
46  # fastbdt_options.m_nTrees = 100
47  fastbdt_options.m_randRatio = 1.0
48  basf2_mva.teacher(general_options, fastbdt_options)
49 
50  general_options.m_identifier = "MVAOrdinary"
51  general_options.m_variables = basf2_mva.vector(*variables[:-1])
52  basf2_mva.teacher(general_options, fastbdt_options)
53 
54  meta_options = basf2_mva.MetaOptions()
55  meta_options.m_use_splot = True
56  meta_options.m_splot_variable = "M"
57  # SPlot training assumes that the datafile given to the general options contains only data
58  # It requires an additional file with MC information from which it can extract the distribution
59  # of the discriminating variable (in this case M).
60  # Here we use the same file
61  general_options.m_datafiles = basf2_mva.vector("train_data.root")
62  meta_options.m_splot_mc_files = basf2_mva.vector("train_mc.root")
63 
64  # First we do an ordinary sPlot training
65  general_options.m_identifier = "MVASPlot"
66  meta_options.m_splot_combined = False
67  meta_options.m_splot_boosted = False
68  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
69 
70  # Now we combine the sPlot training with a PDF classifier for M, in one step
71  general_options.m_identifier = "MVASPlotCombined"
72  meta_options.m_splot_combined = True
73  meta_options.m_splot_boosted = False
74  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
75 
76  # Now we use a boosted sPlot training
77  general_options.m_identifier = "MVASPlotBoosted"
78  meta_options.m_splot_combined = False
79  meta_options.m_splot_boosted = True
80  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
81 
82  # And finally a boosted and combined training
83  general_options.m_identifier = "MVASPlotCombinedBoosted"
84  meta_options.m_splot_combined = True
85  meta_options.m_splot_boosted = True
86  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
87 
88  # Also do a training of only the pdf classifier
89  pdf_options = basf2_mva.PDFOptions()
90  general_options.m_method = 'PDF'
91  general_options.m_identifier = "MVAPdf"
92  general_options.m_variables = basf2_mva.vector('M')
93  basf2_mva.teacher(general_options, pdf_options)