Belle II Software  release-05-01-25
builtin_splot.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2016
5 
6 import basf2_mva
7 from basf2 import *
8 from modularAnalysis import *
9 
10 
11 if __name__ == "__main__":
12  from basf2 import conditions
13  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
14  conditions.testing_payloads = [
15  'localdb/database.txt'
16  ]
17 
18  variables = ['M', 'p', 'pt', 'pz', 'phi',
19  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
20  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
21  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
22  'chiProb', 'dr', 'dz', 'dphi',
23  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
24  'daughter(0, dphi)', 'daughter(1, dphi)',
25  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
26  'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
27  'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
28  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
29  'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
30  'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
31  'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
32  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
33  'daughter(2, daughter(0, minC2HDist))', 'daughter(2, daughter(1, minC2HDist))',
34  'daughterInvariantMass(1, 2)']
35 
36  # Perform an sPlot training
37  general_options = basf2_mva.GeneralOptions()
38  general_options.m_datafiles = basf2_mva.vector("train_mc.root")
39  general_options.m_identifier = "MVAFull"
40  general_options.m_treename = "tree"
41  general_options.m_variables = basf2_mva.vector(*variables)
42  general_options.m_target_variable = "isSignal"
43 
44  fastbdt_options = basf2_mva.FastBDTOptions()
45  # SPlot is more stable if one doesn't use the randRatio
46  # FastBDT has a special sPlot mode, but which isn't implemented yet in the mva package
47  fastbdt_options.m_nTrees = 100
48  fastbdt_options.m_randRatio = 1.0
49  basf2_mva.teacher(general_options, fastbdt_options)
50 
51  general_options.m_identifier = "MVAOrdinary"
52  general_options.m_variables = basf2_mva.vector(*variables[1:])
53  basf2_mva.teacher(general_options, fastbdt_options)
54 
55  meta_options = basf2_mva.MetaOptions()
56  meta_options.m_use_splot = True
57  meta_options.m_splot_variable = "M"
58  # SPlot training assumes that the datafile given to the general options contains only data
59  # It requires an additional file with MC information from which it can extract the distribution
60  # of the discriminating variable (in this case M).
61  # Here we use the same file
62  general_options.m_datafiles = basf2_mva.vector("train_data.root")
63  meta_options.m_splot_mc_files = basf2_mva.vector("train_mc.root")
64 
65  # First we do an ordinary sPlot training
66  general_options.m_identifier = "MVASPlot"
67  meta_options.m_splot_combined = False
68  meta_options.m_splot_boosted = False
69  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
70 
71  # Now we combine the sPlot training with a PDF classifier for M, in one step
72  general_options.m_identifier = "MVASPlotCombined"
73  meta_options.m_splot_combined = True
74  meta_options.m_splot_boosted = False
75  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
76 
77  # Now we use a bossted sPlot training
78  general_options.m_identifier = "MVASPlotBoosted"
79  meta_options.m_splot_combined = False
80  meta_options.m_splot_boosted = True
81  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
82 
83  # And finally a boosted and combined training
84  general_options.m_identifier = "MVASPlotCombinedBoosted"
85  meta_options.m_splot_combined = True
86  meta_options.m_splot_boosted = True
87  basf2_mva.teacher(general_options, fastbdt_options, meta_options)
88 
89  # Also do a training of only the pdf classifier
90  pdf_options = basf2_mva.PDFOptions()
91  general_options.m_method = 'PDF'
92  general_options.m_identifier = "MVAPdf"
93  general_options.m_variables = basf2_mva.vector('M')
94  basf2_mva.teacher(general_options, pdf_options)
95 
96  # Apply the trained methods on data
97  basf2_mva.expert(basf2_mva.vector('MVAPdf', 'MVAFull', 'MVAOrdinary', 'MVASPlot',
98  'MVASPlotCombined', 'MVASPlotBoosted', 'MVASPlotCombinedBoosted'),
99  basf2_mva.vector('train.root'), 'tree', 'expert.root')
100 
101  """
102  path = create_path()
103  inputMdstList('MC6', ['/storage/jbod/tkeck/MC6/evtgen-charged/sub00/mdst_0001*.root'], path=path)
104  fillParticleLists([('K-', 'kaonID > 0.5'), ('pi+', 'pionID > 0.5')], path=path)
105  reconstructDecay('D0 -> K- pi+', '1.8 < M < 1.9', path=path)
106  KFit('D0', 0.1, path=path)
107  applyCuts('D0', '1.8 < M < 1.9', path=path)
108  matchMCTruth('D0', path=path)
109 
110  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Pdf', identifier='MVAPdf')
111  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Full', identifier='MVAFull')
112  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='Ordinary', identifier='MVAOrdinary')
113  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlot', identifier='MVASPlot')
114  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotCombined', identifier='MVASPlotCombined')
115  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotBoosted', identifier='MVASPlotBoosted')
116  path.add_module('MVAExpert', listNames=['D0'], extraInfoName='SPlotCombinedBoosted', identifier='MVASPlotCombinedBoosted')
117  variablesToNtuple('D0', ['isSignal', 'extraInfo(Pdf)', 'extraInfo(Full)', 'extraInfo(Ordinary)', 'extraInfo(SPlot)',
118  'extraInfo(SPlotCombined)', 'extraInfo(SPlotBoosted)', 'extraInfo(SPlotCombinedBoosted)'], path=path)
119  process(path)
120  """