Belle II Software  light-2205-abys
ChargedPidMVAModule.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # Doxygen should skip this script
12 # @cond
13 
14 """
15 This steering file fills an NTuple with the ChargedPidMVA score
16 for charged particle identification. By default, global PID info is stored,
17 meaning one signal hypothesis is tested against all others.
18 Optionally, binary PID can be stored, by testing one (or more) pair of (S,B) mass hypotheses.
19 
20 Usage:
21 
22 basf2 -i /PATH/TO/MDST/FILE.root analysis/examples/PostMdstIdentification/ChargedPidMVAModule.py -- [OPTIONS]
23 
24 Input: *_mdst_*.root
25 Output: *_ntup_*.root
26 
27 """
28 
29 import argparse
30 import re
31 from modularAnalysis import getAnalysisGlobaltag
32 
33 
34 def argparser():
35 
36  parser = argparse.ArgumentParser(description=__doc__,
37  formatter_class=argparse.RawTextHelpFormatter)
38 
39  def sb_pair(arg):
40  try:
41  s, b = map(int, arg.split(','))
42  return s, b
43  except BaseException:
44  raise argparse.ArgumentTypeError("Option string must be of the form 'S,B'")
45 
46  parser.add_argument("--matchTruth",
47  action="store_true",
48  default=False,
49  help="Apply truth-matching on particles.")
50  parser.add_argument("--testHyposPDGCodePair",
51  type=sb_pair,
52  nargs='+',
53  default=(0, 0),
54  help="Option required in binary mode.\n"
55  "A list of pdgId pairs of the (S, B) charged stable particle mass hypotheses to test.\n"
56  "Pass a space-separated list of (>= 1) S,B pdgIds, e.g.:\n"
57  "'--testHyposPDGCodePair 11,211 13,211'")
58  parser.add_argument("--addECLOnly",
59  dest="add_ecl_only",
60  action="store_true",
61  default=False,
62  help="Apply the BDT also for the ECL-only training."
63  "This will result in a separate score branch in the ntuple.")
64  parser.add_argument("--chargeIndependent",
65  action="store_true",
66  default=False,
67  help="Use a BDT trained on a sample of inclusively charged particles.")
68  parser.add_argument("--global_tag_append",
69  type=str,
70  nargs="+",
71  default=[getAnalysisGlobaltag()],
72  help="List of names of conditions DB global tag(s) to append on top of GT replay.\n"
73  "NB: these GTs will have lowest priority.\n"
74  "Pass a space-separated list of names.")
75  parser.add_argument("-d", "--debug",
76  dest="debug",
77  action="store",
78  default=0,
79  type=int,
80  choices=list(range(11, 20)),
81  help="Run the ChargedPidMVA module in debug mode. Pass the desired DEBUG level integer.")
82 
83  return parser
84 
85 
86 if __name__ == '__main__':
87 
88  args = argparser().parse_args()
89 
90  import basf2
91  import modularAnalysis as ma
92  from ROOT import Belle2
93  import pdg
94 
95  for tag in args.global_tag_append:
96  basf2.conditions.append_globaltag(tag)
97  print(f"Appending GTs:\n{args.global_tag_append}")
98 
99  # ------------
100  # Create path.
101  # ------------
102 
103  path = basf2.create_path()
104 
105  # ----------
106  # Add input.
107  # ----------
108 
109  ma.inputMdst(filename=basf2.find_file("mdst13.root", "validation"),
110  path=path)
111 
112  # ---------------------------------------
113  # Load standard charged stable particles,
114  # and fill corresponding particle lists.
115  # ---------------------------------------
116 
117  std_charged = [
118  Belle2.Const.electron.getPDGCode(),
119  Belle2.Const.muon.getPDGCode(),
120  Belle2.Const.pion.getPDGCode(),
121  Belle2.Const.kaon.getPDGCode(),
122  Belle2.Const.proton.getPDGCode(),
123  Belle2.Const.deuteron.getPDGCode(),
124  ]
125 
126  plists = [(f"{pdg.to_name(pdgId)}:my_{pdg.to_name(pdgId)}", "") for pdgId in std_charged]
127  ma.fillParticleLists(plists, path=path)
128 
129  # --------------------------
130  # (Optional) truth matching.
131  # --------------------------
132 
133  if args.matchTruth:
134  for plistname, _ in plists:
135  ma.matchMCTruth(plistname, path=path)
136  ma.applyCuts(plistname, "isSignal == 1", path=path)
137 
138  # -------------------
139  # Global/Binary PID ?
140  # -------------------
141 
142  global_pid = (args.testHyposPDGCodePair == (0, 0))
143  binary_pid = not global_pid
144 
145  # ----------------------
146  # Apply charged Pid MVA.
147  # ----------------------
148 
149  if global_pid:
150  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
151  path=path,
152  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_Multiclass,
153  chargeIndependent=args.chargeIndependent)
154  if args.add_ecl_only:
155  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
156  path=path,
157  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_ECL_Multiclass)
158  elif binary_pid:
159  for s, b in args.testHyposPDGCodePair:
160  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
161  path=path,
162  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_Classification,
163  binaryHypoPDGCodes=(s, b),
164  chargeIndependent=args.chargeIndependent)
165  if args.add_ecl_only:
166  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
167  path=path,
168  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_ECL_Classification,
169  binaryHypoPDGCodes=(s, b))
170 
171  if args.debug:
172  for m in path.modules():
173  if "ChargedPidMVA" in m.name():
174  m.logging.log_level = basf2.LogLevel.DEBUG
175  m.logging.debug_level = args.debug
176 
177  # ---------------
178  # Make an NTuple.
179  # ---------------
180 
181  if global_pid:
182 
183  append = "_vs_".join(map(str, std_charged))
184 
185  variables = [f"pidChargedBDTScore({pdgId}, ALL)" for pdgId in std_charged]
186  if args.add_ecl_only:
187  variables += [f"pidChargedBDTScore({pdgId}, ECL)" for pdgId in std_charged]
188 
189  elif binary_pid:
190 
191  append = "__".join([f"{s}_vs_{b}" for s, b in args.testHyposPDGCodePair])
192 
193  variables = [f"pidPairChargedBDTScore({s}, {b}, ALL)" for s, b in args.testHyposPDGCodePair]
194  if args.add_ecl_only:
195  variables += [f"pidPairChargedBDTScore({s}, {b}, ECL)" for s, b in args.testHyposPDGCodePair]
196 
197  filename = f"chargedpid_ntuples__{append}.root"
198 
199  for plistname, _ in plists:
200 
201  # ROOT doesn't like non-alphanum chars...
202  treename = re.sub(r"[\W]+", "", plistname.split(':')[1])
203 
204  if global_pid:
205  ma.variablesToNtuple(decayString=plistname,
206  variables=variables,
207  treename=treename,
208  filename=filename,
209  path=path)
210  elif binary_pid:
211  ma.variablesToNtuple(decayString=plistname,
212  variables=variables,
213  treename=treename,
214  filename=filename,
215  path=path)
216 
217  # -----------------
218  # Monitor progress.
219  # -----------------
220 
221  progress = basf2.register_module("Progress")
222  path.add_module(progress)
223 
224  # ---------------
225  # Process events.
226  # ---------------
227 
228  # Start processing of modules.
229  basf2.process(path)
230 
231  # Print basf2 call statistics.
232  print(basf2.statistics)
233 
234 # @endcond