Belle II Software  release-06-02-00
ChargedPidMVAModule.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # Doxygen should skip this script
12 # @cond
13 
14 """
15 This steering file fills an NTuple with the ChargedPidMVA score
16 for charged particle identification. By default, global PID info is stored,
17 meaning one signal hypothesis is tested against all others.
18 Optionally, binary PID can be stored, by testing one (or more) pair of (S,B) mass hypotheses.
19 
20 Usage:
21 
22 basf2 -i /PATH/TO/MDST/FILE.root analysis/examples/PostMdstIdentification/ChargedPidMVAModule.py -- [OPTIONS]
23 
24 Input: *_mdst_*.root
25 Output: *_ntup_*.root
26 
27 """
28 
29 import argparse
30 import re
31 from modularAnalysis import getAnalysisGlobaltag
32 
33 
34 def argparser():
35 
36  parser = argparse.ArgumentParser(description=__doc__,
37  formatter_class=argparse.RawTextHelpFormatter)
38 
39  def sb_pair(arg):
40  try:
41  s, b = map(int, arg.split(','))
42  return s, b
43  except BaseException:
44  raise argparse.ArgumentTypeError("Option string must be of the form 'S,B'")
45 
46  parser.add_argument("--matchTruth",
47  action="store_true",
48  default=False,
49  help="Apply truth-matching on particles.")
50  parser.add_argument("--testHyposPDGCodePair",
51  type=sb_pair,
52  nargs='+',
53  default=(0, 0),
54  help="Option required in binary mode.\n"
55  "A list of pdgId pairs of the (S, B) charged stable particle mass hypotheses to test.\n"
56  "Pass a space-separated list of (>= 1) S,B pdgIds, e.g.:\n"
57  "'--testHyposPDGCodePair 11,211 13,211'")
58  parser.add_argument("--addECLOnly",
59  dest="add_ecl_only",
60  action="store_true",
61  default=False,
62  help="Apply the BDT also for the ECL-only training."
63  "This will result in a separate score branch in the ntuple.")
64  parser.add_argument("--chargeIndependent",
65  action="store_true",
66  default=False,
67  help="Use a BDT trained on a sample of inclusively charged particles.")
68  parser.add_argument("--global_tag_append",
69  type=str,
70  nargs="+",
71  default=[getAnalysisGlobaltag()],
72  help="List of names of conditions DB global tag(s) to append on top of GT replay.\n"
73  "NB: these GTs will have lowest priority.\n"
74  "Pass a space-separated list of names.")
75  parser.add_argument("-d", "--debug",
76  dest="debug",
77  action="store",
78  default=0,
79  type=int,
80  choices=list(range(11, 20)),
81  help="Run the ChargedPidMVA module in debug mode. Pass the desired DEBUG level integer.")
82 
83  return parser
84 
85 
86 if __name__ == '__main__':
87 
88  args = argparser().parse_args()
89 
90  import basf2
91  import modularAnalysis as ma
92  from ROOT import Belle2
93  import pdg
94 
95  for tag in args.global_tag_append:
96  basf2.conditions.append_globaltag(tag)
97  print(f"Appending GTs:\n{args.global_tag_append}")
98 
99  # ------------
100  # Create path.
101  # ------------
102 
103  path = basf2.create_path()
104 
105  # ----------
106  # Add input.
107  # ----------
108 
109  ma.inputMdst(environmentType="default",
110  filename=basf2.find_file("mdst13.root", "validation"),
111  path=path)
112 
113  # ---------------------------------------
114  # Load standard charged stable particles,
115  # and fill corresponding particle lists.
116  # ---------------------------------------
117 
118  std_charged = [
119  Belle2.Const.electron.getPDGCode(),
120  Belle2.Const.muon.getPDGCode(),
121  Belle2.Const.pion.getPDGCode(),
122  Belle2.Const.kaon.getPDGCode(),
123  Belle2.Const.proton.getPDGCode(),
124  Belle2.Const.deuteron.getPDGCode(),
125  ]
126 
127  plists = [(f"{pdg.to_name(pdgId)}:my_{pdg.to_name(pdgId)}", "") for pdgId in std_charged]
128  ma.fillParticleLists(plists, path=path)
129 
130  # --------------------------
131  # (Optional) truth matching.
132  # --------------------------
133 
134  if args.matchTruth:
135  for plistname, _ in plists:
136  ma.matchMCTruth(plistname, path=path)
137  applyCuts(plistname, "isSignal == 1", path=path)
138 
139  # -------------------
140  # Global/Binary PID ?
141  # -------------------
142 
143  global_pid = (args.testHyposPDGCodePair == (0, 0))
144  binary_pid = not global_pid
145 
146  # ----------------------
147  # Apply charged Pid MVA.
148  # ----------------------
149 
150  if global_pid:
151  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
152  path=path,
153  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_Multiclass,
154  chargeIndependent=args.chargeIndependent)
155  if args.add_ecl_only:
156  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
157  path=path,
158  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_ECL_Multiclass)
159  elif binary_pid:
160  for s, b in args.testHyposPDGCodePair:
161  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
162  path=path,
163  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_Classification,
164  binaryHypoPDGCodes=(s, b),
165  chargeIndependent=args.chargeIndependent)
166  if args.add_ecl_only:
167  ma.applyChargedPidMVA(particleLists=[plistname for plistname, _ in plists],
168  path=path,
169  trainingMode=Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode.c_ECL_Classification,
170  binaryHypoPDGCodes=(s, b))
171 
172  if args.debug:
173  for m in path.modules():
174  if "ChargedPidMVA" in m.name():
175  m.logging.log_level = basf2.LogLevel.DEBUG
176  m.logging.debug_level = args.debug
177 
178  # ---------------
179  # Make an NTuple.
180  # ---------------
181 
182  if global_pid:
183 
184  append = "_vs_".join(map(str, std_charged))
185 
186  variables = [f"pidChargedBDTScore({pdgId}, ALL)" for pdgId in std_charged]
187  if args.add_ecl_only:
188  variables += [f"pidChargedBDTScore({pdgId}, ECL)" for pdgId in std_charged]
189 
190  elif binary_pid:
191 
192  append = "__".join([f"{s}_vs_{b}" for s, b in args.testHyposPDGCodePair])
193 
194  variables = [f"pidPairChargedBDTScore({s}, {b}, ALL)" for s, b in args.testHyposPDGCodePair]
195  if args.add_ecl_only:
196  variables += [f"pidPairChargedBDTScore({s}, {b}, ECL)" for s, b in args.testHyposPDGCodePair]
197 
198  filename = f"chargedpid_ntuples__{append}.root"
199 
200  for plistname, _ in plists:
201 
202  # ROOT doesn't like non-alphanum chars...
203  treename = re.sub(r"[\W]+", "", plistname.split(':')[1])
204 
205  if global_pid:
206  ma.variablesToNtuple(decayString=plistname,
207  variables=variables,
208  treename=treename,
209  filename=filename,
210  path=path)
211  elif binary_pid:
212  ma.variablesToNtuple(decayString=plistname,
213  variables=variables,
214  treename=treename,
215  filename=filename,
216  path=path)
217 
218  # -----------------
219  # Monitor progress.
220  # -----------------
221 
222  progress = basf2.register_module("Progress")
223  path.add_module(progress)
224 
225  # ---------------
226  # Process events.
227  # ---------------
228 
229  # Start processing of modules.
230  basf2.process(path)
231 
232  # Print basf2 call statistics.
233  print(basf2.statistics)
234 
235 # @endcond