Belle II Software  release-05-01-25
builtin_splot.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2016
5 
6 import basf2_mva
7 from basf2 import *
8 from modularAnalysis import *
9 
10 
11 if __name__ == "__main__":
12  variables = ['p', 'pt', 'pz', 'phi',
13  'chiProb', 'dr', 'dz', 'dphi',
14  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
15  'daughter(0, dphi)', 'daughter(1, dphi)',
16  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
17  'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
18  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
19  'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
20  'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
21  'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
22  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
23  'daughter(2, daughter(0, minC2HDist))', 'daughter(2, daughter(1, minC2HDist))',
24  # We do not use kinematic variables of the daughters
25  # 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
26  # 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
27  # 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
28  # 'daughterInvariantMass(1, 2)', 'daughterInvariantMass(0, 1)', 'daughterInvariantMass(0, 2)',
29  # 'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
30  'M',
31  ]
32 
33  # Perform an sPlot training
34  general_options = basf2_mva.GeneralOptions()
35  general_options.m_datafiles = basf2_mva.vector("train_mc.root")
36  general_options.m_identifier = "MVAFull"
37  general_options.m_treename = "tree"
38  general_options.m_variables = basf2_mva.vector(*variables)
39  general_options.m_target_variable = "isSignal"
40 
41  fastbdt_options = basf2_mva.FastBDTOptions()
42  # SPlot is more stable if one doesn't use the randRatio
43  # FastBDT has a special sPlot mode, but which isn't implemented yet in the mva package
44  # fastbdt_options.m_nTrees = 100
45  fastbdt_options.m_randRatio = 1.0
46  basf2_mva.teacher(general_options, fastbdt_options)
47 
48  general_options.m_identifier = "MVAOrdinary"
49  general_options.m_variables = basf2_mva.vector(*variables[:-1])
50  basf2_mva.teacher(general_options, fastbdt_options)
51 
52  meta_options = basf2_mva.MetaOptions()
53  meta_options.m_use_splot = True
54  meta_options.m_splot_variable = "M"
55  # SPlot training assumes that the datafile given to the general options contains only data
56  # It requires an additional file with MC information from which it can extract the distribution
57  # of the discriminating variable (in this case M).
58  # Here we use the same file
59  general_options.m_datafiles = basf2_mva.vector("train_data.root")
60  meta_options.m_splot_mc_files = basf2_mva.vector("train_mc.root")
61 
62  # First we do an ordinary sPlot training
63  general_options.m_identifier = "MVASPlot"
64  meta_options.m_splot_combined = False
65  meta_options.m_splot_boosted = False
66  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
67 
68  # Now we combine the sPlot training with a PDF classifier for M, in one step
69  general_options.m_identifier = "MVASPlotCombined"
70  meta_options.m_splot_combined = True
71  meta_options.m_splot_boosted = False
72  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
73 
74  # Now we use a bossted sPlot training
75  general_options.m_identifier = "MVASPlotBoosted"
76  meta_options.m_splot_combined = False
77  meta_options.m_splot_boosted = True
78  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
79 
80  # And finally a boosted and combined training
81  general_options.m_identifier = "MVASPlotCombinedBoosted"
82  meta_options.m_splot_combined = True
83  meta_options.m_splot_boosted = True
84  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
85 
86  # Also do a training of only the pdf classifier
87  pdf_options = basf2_mva.PDFOptions()
88  general_options.m_method = 'PDF'
89  general_options.m_identifier = "MVAPdf"
90  general_options.m_variables = basf2_mva.vector('M')
91  basf2_mva.teacher(general_options, pdf_options)