Belle II Software development
ChargedPidMVAModule.py
1#!/usr/bin/env python3
2
3
10
11"""
12This steering file fills an NTuple with the ChargedPidMVA score
13for charged particle identification. By default, global PID info is stored,
14meaning one signal hypothesis is tested against all others.
15Optionally, binary PID can be stored, by testing one (or more) pair of (S,B) mass hypotheses.
16
17Usage:
18
19basf2 [-i /PATH/TO/MDST/FILE.root] analysis/examples/PostMdstIdentification/ChargedPidMVAModule.py -- [OPTIONS]
20
21Input: *_mdst_*.root
22Output: *_ntup_*.root
23
24"""
25
26import argparse
27import re
28from modularAnalysis import getAnalysisGlobaltag
29
30
31def argparser():
32
33 parser = argparse.ArgumentParser(description=__doc__,
34 formatter_class=argparse.RawTextHelpFormatter)
35
36 def sb_pair(arg):
37 try:
38 s, b = map(int, arg.split(','))
39 return s, b
40 except BaseException:
41 raise argparse.ArgumentTypeError("Option string must be of the form 'S,B'")
42
43 parser.add_argument("--matchTruth",
44 action="store_true",
45 default=False,
46 help="Apply truth-matching on particles.")
47 parser.add_argument("--testHyposPDGCodePair",
48 type=sb_pair,
49 nargs='+',
50 default=(0, 0),
51 help="Option required in binary mode.\n"
52 "A list of pdgId pairs of the (S, B) charged stable particle mass hypotheses to test.\n"
53 "Pass a space-separated list of (>= 1) S,B pdgIds, e.g.:\n"
54 "'--testHyposPDGCodePair 11,211 13,211'")
55 parser.add_argument("--addECLOnly",
56 dest="add_ecl_only",
57 action="store_true",
58 default=False,
59 help="Apply the BDT also for the ECL-only training."
60 "This will result in a separate score branch in the ntuple.")
61 parser.add_argument("--chargeIndependent",
62 action="store_true",
63 default=False,
64 help="Use a BDT trained on a sample of inclusively charged particles.")
65 parser.add_argument("--global_tag_append",
66 type=str,
67 nargs="+",
68 default=[getAnalysisGlobaltag()],
69 help="List of names of conditions DB global tag(s) to append on top of GT replay.\n"
70 "NB: these GTs will have lowest priority over GT replay.\n"
71 "The order of the sequence passed determines the priority of the GTs, w/ the highest coming first.\n"
72 "Pass a space-separated list of names.")
73 parser.add_argument("--global_tag_prepend",
74 type=str,
75 nargs="+",
76 default=None,
77 help="List of names of conditions DB global tag(s) to prepend to GT replay.\n"
78 "NB: these GTs will have highest priority over GT replay.\n"
79 "The order of the sequence passed determines the priority of the GTs, w/ the highest coming first.\n"
80 "Pass a space-separated list of names.")
81 parser.add_argument("--append_testing_payloads",
82 type=str,
83 default=None,
84 help="Path to a text file with local test payloads.\n"
85 "NB: these will have higher priority than any payload in the GT(s).\n"
86 "Use ONLY for testing.")
87 parser.add_argument("-d", "--debug",
88 dest="debug",
89 action="store",
90 default=0,
91 type=int,
92 choices=list(range(11, 20)),
93 help="Run the ChargedPidMVA module in debug mode. Pass the desired DEBUG level integer.")
94
95 return parser
96
97
98if __name__ == '__main__':
99
100 args = argparser().parse_args()
101
102 import basf2
103 import modularAnalysis as ma
104 from ROOT import Belle2
105 import pdg
106
107 for tag in args.global_tag_append:
108 basf2.conditions.append_globaltag(tag)
109 print(f"Appending GTs:\n{args.global_tag_append}")
110
111 if args.global_tag_prepend:
112 for tag in reversed(args.global_tag_prepend):
113 basf2.conditions.prepend_globaltag(tag)
114 print(f"Prepending GTs:\n{args.global_tag_prepend}")
115
116 if args.append_testing_payloads:
117 basf2.conditions.append_testing_payloads(args.append_testing_payloads)
118 print(f"Appending testing payloads (will have highest priority!)):\n{args.append_testing_payloads}")
119
120 # ------------
121 # Create path.
122 # ------------
123
124 path = basf2.create_path()
125
126 # ----------
127 # Add input.
128 # ----------
129
130 ma.inputMdst(filename=basf2.find_file("mdst16.root", "validation"),
131 path=path)
132
133 # ---------------------------------------
134 # Load standard charged stable particles,
135 # and fill corresponding particle lists.
136 # ---------------------------------------
137
138 std_charged = [
139 Belle2.Const.electron.getPDGCode(),
140 Belle2.Const.muon.getPDGCode(),
141 Belle2.Const.pion.getPDGCode(),
142 Belle2.Const.kaon.getPDGCode(),
143 Belle2.Const.proton.getPDGCode(),
144 Belle2.Const.deuteron.getPDGCode(),
145 ]
146
147 plists = [(f"{pdg.to_name(pdgId)}:my_{pdg.to_name(pdgId)}", "") for pdgId in std_charged]
148 ma.fillParticleLists(plists, path=path)
149
150 # --------------------------
151 # (Optional) truth matching.
152 # --------------------------
153
154 if args.matchTruth:
155 for plistname, _ in plists:
156 ma.matchMCTruth(plistname, path=path)
157 ma.applyCuts(plistname, "isSignal == 1", path=path)
158
159 # -------------------
160 # Global/Binary PID ?
161 # -------------------
162
163 global_pid = (args.testHyposPDGCodePair == (0, 0))
164 binary_pid = not global_pid
165
166 # ----------------------
167 # Apply charged Pid MVA.
168 # ----------------------
169
170 if global_pid:
171 ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
172 path=path,
173 trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_Multiclass,
174 chargeIndependent=args.chargeIndependent)
175 if args.add_ecl_only:
176 ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
177 path=path,
178 trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_ECL_Multiclass)
179 elif binary_pid:
180 for s, b in args.testHyposPDGCodePair:
181 ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
182 path=path,
183 trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_Classification,
184 binaryHypoPDGCodes=(s, b),
185 chargeIndependent=args.chargeIndependent)
186 if args.add_ecl_only:
187 ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
188 path=path,
189 trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_ECL_Classification,
190 binaryHypoPDGCodes=(s, b))
191
192 if args.debug:
193 for m in path.modules():
194 if "ChargedPidMVA" in m.name():
195 m.logging.log_level = basf2.LogLevel.DEBUG
196 m.logging.debug_level = args.debug
197
198 # ---------------
199 # Make an NTuple.
200 # ---------------
201
202 if global_pid:
203
204 append = "_vs_".join(map(str, std_charged))
205
206 variables = [f"pidChargedBDTScore({pdgId}, ALL)" for pdgId in std_charged]
207 if args.add_ecl_only:
208 variables += [f"pidChargedBDTScore({pdgId}, ECL)" for pdgId in std_charged]
209
210 elif binary_pid:
211
212 append = "__".join([f"{s}_vs_{b}" for s, b in args.testHyposPDGCodePair])
213
214 variables = [f"pidPairChargedBDTScore({s}, {b}, ALL)" for s, b in args.testHyposPDGCodePair]
215 if args.add_ecl_only:
216 variables += [f"pidPairChargedBDTScore({s}, {b}, ECL)" for s, b in args.testHyposPDGCodePair]
217
218 filename = f"chargedpid_ntuples__{append}.root"
219
220 for plistname, _ in plists:
221
222 # ROOT doesn't like non-alphanum chars...
223 treename = re.sub(r"[\W]+", "", plistname.split(':')[1])
224
225 if global_pid:
226 ma.variablesToNtuple(decayString=plistname,
227 variables=variables,
228 treename=treename,
229 filename=filename,
230 path=path)
231 elif binary_pid:
232 ma.variablesToNtuple(decayString=plistname,
233 variables=variables,
234 treename=treename,
235 filename=filename,
236 path=path)
237
238 # -----------------
239 # Monitor progress.
240 # -----------------
241
242 progress = basf2.register_module("Progress")
243 path.add_module(progress)
244
245 # ---------------
246 # Process events.
247 # ---------------
248
249 # Start processing of modules.
250 basf2.process(path)
STL class.