Belle II Software development
builtin_splot.py
1#!/usr/bin/env python3
2
3
10
11import basf2_mva
12
13if __name__ == "__main__":
14 from basf2 import conditions
15 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
16 conditions.testing_payloads = [
17 'localdb/database.txt'
18 ]
19
20 variables = ['M', 'p', 'pt', 'pz', 'phi',
21 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
22 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
23 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
24 'chiProb', 'dr', 'dz', 'dphi',
25 'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
26 'daughter(0, dphi)', 'daughter(1, dphi)',
27 'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
28 'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
29 'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
30 'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
31 'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
32 'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
33 'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
34 'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
35 'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
36 'daughterInvM(1, 2)']
37
38 # Perform an sPlot training
39 general_options = basf2_mva.GeneralOptions()
40 general_options.m_datafiles = basf2_mva.vector("train_mc.root")
41 general_options.m_identifier = "MVAFull"
42 general_options.m_treename = "tree"
43 general_options.m_variables = basf2_mva.vector(*variables)
44 general_options.m_target_variable = "isSignal"
45
46 fastbdt_options = basf2_mva.FastBDTOptions()
47 # SPlot is more stable if one doesn't use the randRatio
48 # FastBDT has a special sPlot mode, but which isn't implemented yet in the mva package
49 fastbdt_options.m_nTrees = 100
50 fastbdt_options.m_randRatio = 1.0
51 basf2_mva.teacher(general_options, fastbdt_options)
52
53 general_options.m_identifier = "MVAOrdinary"
54 general_options.m_variables = basf2_mva.vector(*variables[1:])
55 basf2_mva.teacher(general_options, fastbdt_options)
56
57 meta_options = basf2_mva.MetaOptions()
58 meta_options.m_use_splot = True
59 meta_options.m_splot_variable = "M"
60 # SPlot training assumes that the datafile given to the general options contains only data
61 # It requires an additional file with MC information from which it can extract the distribution
62 # of the discriminating variable (in this case M).
63 # Here we use the same file
64 general_options.m_datafiles = basf2_mva.vector("train_data.root")
65 meta_options.m_splot_mc_files = basf2_mva.vector("train_mc.root")
66
67 # First we do an ordinary sPlot training
68 general_options.m_identifier = "MVASPlot"
69 meta_options.m_splot_combined = False
70 meta_options.m_splot_boosted = False
71 basf2_mva.teacher(general_options, fastbdt_options, meta_options)
72
73 # Now we combine the sPlot training with a PDF classifier for M, in one step
74 general_options.m_identifier = "MVASPlotCombined"
75 meta_options.m_splot_combined = True
76 meta_options.m_splot_boosted = False
77 basf2_mva.teacher(general_options, fastbdt_options, meta_options)
78
79 # Now we use a boosted sPlot training
80 general_options.m_identifier = "MVASPlotBoosted"
81 meta_options.m_splot_combined = False
82 meta_options.m_splot_boosted = True
83 basf2_mva.teacher(general_options, fastbdt_options, meta_options)
84
85 # And finally a boosted and combined training
86 general_options.m_identifier = "MVASPlotCombinedBoosted"
87 meta_options.m_splot_combined = True
88 meta_options.m_splot_boosted = True
89 basf2_mva.teacher(general_options, fastbdt_options, meta_options)
90
91 # Also do a training of only the pdf classifier
92 pdf_options = basf2_mva.PDFOptions()
93 general_options.m_method = 'PDF'
94 general_options.m_identifier = "MVAPdf"
95 general_options.m_variables = basf2_mva.vector('M')
96 basf2_mva.teacher(general_options, pdf_options)
97
98 # Apply the trained methods on data
99 basf2_mva.expert(basf2_mva.vector('MVAPdf', 'MVAFull', 'MVAOrdinary', 'MVASPlot',
100 'MVASPlotCombined', 'MVASPlotBoosted', 'MVASPlotCombinedBoosted'),
101 basf2_mva.vector('train.root'), 'tree', 'expert.root')
102
103 """
104 path = b2.create_path()
105 ma.inputMdstList('MC6', ['/storage/jbod/tkeck/MC6/evtgen-charged/sub00/mdst_0001*.root'], path=path)
106 ma.fillParticleLists([('K-', 'kaonID > 0.5'), ('pi+', 'pionID > 0.5')], path=path)
107 ma.reconstructDecay('D0 -> K- pi+', '1.8 < M < 1.9', path=path)
108 vx.kFit('D0', 0.1, path=path)
109 ma.applyCuts('D0', '1.8 < M < 1.9', path=path)
110 ma.matchMCTruth('D0', path=path)
111
112 path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Pdf', identifier='MVAPdf')
113 path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Full', identifier='MVAFull')
114 path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Ordinary', identifier='MVAOrdinary')
115 path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlot', identifier='MVASPlot')
116 path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotCombined', identifier='MVASPlotCombined')
117 path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotBoosted', identifier='MVASPlotBoosted')
118 path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotCombinedBoosted', identifier='MVASPlotCombinedBoosted')
119 ma.variablesToNtuple('D0', ['isSignal', 'extraInfo(Pdf)', 'extraInfo(Full)', 'extraInfo(Ordinary)', 'extraInfo(SPlot)',
120 'extraInfo(SPlotCombined)', 'extraInfo(SPlotBoosted)', 'extraInfo(SPlotCombinedBoosted)'], path=path)
121 b2.process(path)
122 """