Belle II Software  release-08-01-10
builtin_splot.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2_mva
12 
13 if __name__ == "__main__":
14  from basf2 import conditions
15  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
16  conditions.testing_payloads = [
17  'localdb/database.txt'
18  ]
19 
20  variables = ['M', 'p', 'pt', 'pz', 'phi',
21  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
22  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
23  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
24  'chiProb', 'dr', 'dz', 'dphi',
25  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
26  'daughter(0, dphi)', 'daughter(1, dphi)',
27  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
28  'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
29  'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
30  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
31  'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
32  'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
33  'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
34  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
35  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
36  'daughterInvM(1, 2)']
37 
38  # Perform an sPlot training
39  general_options = basf2_mva.GeneralOptions()
40  general_options.m_datafiles = basf2_mva.vector("train_mc.root")
41  general_options.m_identifier = "MVAFull"
42  general_options.m_treename = "tree"
43  general_options.m_variables = basf2_mva.vector(*variables)
44  general_options.m_target_variable = "isSignal"
45 
46  fastbdt_options = basf2_mva.FastBDTOptions()
47  # SPlot is more stable if one doesn't use the randRatio
48  # FastBDT has a special sPlot mode, but which isn't implemented yet in the mva package
49  fastbdt_options.m_nTrees = 100
50  fastbdt_options.m_randRatio = 1.0
51  basf2_mva.teacher(general_options, fastbdt_options)
52 
53  general_options.m_identifier = "MVAOrdinary"
54  general_options.m_variables = basf2_mva.vector(*variables[1:])
55  basf2_mva.teacher(general_options, fastbdt_options)
56 
57  meta_options = basf2_mva.MetaOptions()
58  meta_options.m_use_splot = True
59  meta_options.m_splot_variable = "M"
60  # SPlot training assumes that the datafile given to the general options contains only data
61  # It requires an additional file with MC information from which it can extract the distribution
62  # of the discriminating variable (in this case M).
63  # Here we use the same file
64  general_options.m_datafiles = basf2_mva.vector("train_data.root")
65  meta_options.m_splot_mc_files = basf2_mva.vector("train_mc.root")
66 
67  # First we do an ordinary sPlot training
68  general_options.m_identifier = "MVASPlot"
69  meta_options.m_splot_combined = False
70  meta_options.m_splot_boosted = False
71  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
72 
73  # Now we combine the sPlot training with a PDF classifier for M, in one step
74  general_options.m_identifier = "MVASPlotCombined"
75  meta_options.m_splot_combined = True
76  meta_options.m_splot_boosted = False
77  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
78 
79  # Now we use a boosted sPlot training
80  general_options.m_identifier = "MVASPlotBoosted"
81  meta_options.m_splot_combined = False
82  meta_options.m_splot_boosted = True
83  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
84 
85  # And finally a boosted and combined training
86  general_options.m_identifier = "MVASPlotCombinedBoosted"
87  meta_options.m_splot_combined = True
88  meta_options.m_splot_boosted = True
89  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
90 
91  # Also do a training of only the pdf classifier
92  pdf_options = basf2_mva.PDFOptions()
93  general_options.m_method = 'PDF'
94  general_options.m_identifier = "MVAPdf"
95  general_options.m_variables = basf2_mva.vector('M')
96  basf2_mva.teacher(general_options, pdf_options)
97 
98  # Apply the trained methods on data
99  basf2_mva.expert(basf2_mva.vector('MVAPdf', 'MVAFull', 'MVAOrdinary', 'MVASPlot',
100  'MVASPlotCombined', 'MVASPlotBoosted', 'MVASPlotCombinedBoosted'),
101  basf2_mva.vector('train.root'), 'tree', 'expert.root')
102 
103  """
104  path = b2.create_path()
105  ma.inputMdstList('MC6', ['/storage/jbod/tkeck/MC6/evtgen-charged/sub00/mdst_0001*.root'], path=path)
106  ma.fillParticleLists([('K-', 'kaonID > 0.5'), ('pi+', 'pionID > 0.5')], path=path)
107  ma.reconstructDecay('D0 -> K- pi+', '1.8 < M < 1.9', path=path)
108  vx.kFit('D0', 0.1, path=path)
109  ma.applyCuts('D0', '1.8 < M < 1.9', path=path)
110  ma.matchMCTruth('D0', path=path)
111 
112  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Pdf', identifier='MVAPdf')
113  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Full', identifier='MVAFull')
114  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Ordinary', identifier='MVAOrdinary')
115  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlot', identifier='MVASPlot')
116  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotCombined', identifier='MVASPlotCombined')
117  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotBoosted', identifier='MVASPlotBoosted')
118  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotCombinedBoosted', identifier='MVASPlotCombinedBoosted')
119  ma.variablesToNtuple('D0', ['isSignal', 'extraInfo(Pdf)', 'extraInfo(Full)', 'extraInfo(Ordinary)', 'extraInfo(SPlot)',
120  'extraInfo(SPlotCombined)', 'extraInfo(SPlotBoosted)', 'extraInfo(SPlotCombinedBoosted)'], path=path)
121  b2.process(path)
122  """