Belle II Software  release-08-01-10
0_analysis_charged_pid.py
1 #!/usr/bin/env python3
2 
3 
10 
11 """
12 <header>
13  <input>PartGunChargedStableGenSim.root</input>
14  <output>ChargedPID_Validation.root,Detector_ChargedPID_Validation.root</output>
15  <contact>Sviatoslav Bilokin (sviatoslav.bilokin@desy.de)</contact>
16  <description>
17  Check the PID efficiency and fake rates
18  </description>
19 </header>
20 """
21 
22 import basf2
23 import ROOT
24 from modularAnalysis import cutAndCopyList, inputMdst, fillParticleList, variablesToHistogram
25 from reconstruction import add_reconstruction
26 from variables import variables
27 
28 INPUT_FILENAME = "../PartGunChargedStableGenSim.root"
29 GLOBALID_OUTPUT_FILENAME = "Global_ChargedPID_Validation.root"
30 DET_PID_OUTPUT_FILENAME = "Detector_ChargedPID_Validation.root"
31 
32 GLOBAL_ID_NAMES = ['electronID', 'muonID', 'pionID', 'kaonID', 'protonID']
33 PARTICLE_TYPES = ['e', 'mu', 'pi', 'K', 'p']
34 PARTICLE_PDGS = [11, 13, 211, 321, 2212]
35 DETECTORS = ['SVD', 'CDC', 'TOP', 'ARICH', 'ECL', 'KLM']
36 DETECTOR_EXPERTS = ["luigi.corona@pi.infn.it", # SVD
37  "alexander.glazov@desy.de", # CDC
38  "marko.staric@ijs.si", # TOP
39  "luka.santelj@ijs.si", # ARICH
40  "torben.ferber@kit.edu", # ECL
41  "piilonen@vt.edu" # KLM
42  ]
43 DETECTOR_ACCEPTANCE_CUTS = [
44  "inCDCAcceptance", # SVD
45  "inCDCAcceptance", # CDC
46  "inTOPAcceptance", # TOP
47  "inARICHAcceptance", # ARICH
48  "inECLAcceptance", # ECL
49  "inKLMAcceptance" # KLM
50 ]
51 
52 PID_EXPERT = "kenta.uno@kek.jp and alessandro.gaz@pd.infn.it"
53 
54 THRESHOLDS = [0.2, 0.8]
55 KIN_VAR_NAMES = ['p', 'cosTheta']
56 KIN_VAR_BINS = [(20, 0., 5), (10, -1, 1)]
57 DEFAULT_OPTIONS = "pvalue-warn=0.5"
58 
59 
60 def get_expert(pid: str, for_detector: bool = False):
61  """
62  Returns the email of expert for detector or PID.
63  """
64  for det, exp in zip(DETECTORS, DETECTOR_EXPERTS):
65  if f'_{det}' in pid:
66  return exp
67  return "sviatoslav.bilokin@desy.de"
68 
69 
70 def add_aliases():
71  """
72  Returns aliases for the detector probability variables
73  """
74  result = []
75  for pdg, name in zip(PARTICLE_PDGS, PARTICLE_TYPES):
76  for det in DETECTORS:
77  new_name = f'pidProbabilityExpert_{name}_{det}'
78  variables.addAlias(new_name, f'pidProbabilityExpert({pdg},{det})')
79  result += [new_name]
80  return result
81 
82 
83 DETECTOR_PIDS = add_aliases()
84 
85 
86 def run_b2analysis():
87  """
88  Function to produce the validation ntuples via basf2.
89  """
90  main = basf2.Path()
91  inputMdst(INPUT_FILENAME, path=main)
92  add_reconstruction(path=main)
93  track_quality_cuts = 'isSignal > 0'
94 
95  for pid, particle in zip(GLOBAL_ID_NAMES, PARTICLE_TYPES):
96  # Total rates:
97  fillParticleList(f'{particle}+:total', track_quality_cuts, path=main)
98  # Total rates for subdetector:
99  for det, cut in zip(DETECTORS, DETECTOR_ACCEPTANCE_CUTS):
100  fillParticleList(f'{particle}+:total_{det}', f'{track_quality_cuts} and {cut}', path=main)
101 
102  # Efficiency cuts:
103  for cut_val in THRESHOLDS:
104  cutAndCopyList(f'{particle}+:cut0{int(cut_val*10)}', f'{particle}+:total',
105  f'{pid} > {cut_val}', path=main)
106  for var, bin_tuple in zip(KIN_VAR_NAMES, KIN_VAR_BINS):
107  variablesToHistogram(f'{particle}+:cut0{int(cut_val*10)}',
108  (var, *bin_tuple),
109  filename=GLOBALID_OUTPUT_FILENAME,
110  path=main,
111  directory=f'cut0{int(cut_val*10)}_{particle}_{pid}_{var}')
112  # Fake rate cuts:
113  for fake_pid in GLOBAL_ID_NAMES:
114  if fake_pid == pid:
115  continue
116  cutAndCopyList(f'{particle}+:fake_{fake_pid}_cut0{int(cut_val*10)}', f'{particle}+:total',
117  f'{fake_pid} > {cut_val}', path=main)
118  for var, bin_tuple in zip(KIN_VAR_NAMES, KIN_VAR_BINS):
119  variablesToHistogram(f'{particle}+:fake_{fake_pid}_cut0{int(cut_val*10)}',
120  (var, *bin_tuple),
121  filename=GLOBALID_OUTPUT_FILENAME,
122  path=main,
123  directory=f'fake_cut0{int(cut_val*10)}_{particle}_{fake_pid}_{var}')
124 
125  # Run detector based selection only for the lower threshold and the momentum bins:
126  det_cut_value = THRESHOLDS[0]
127  for det_pid_var in DETECTOR_PIDS:
128  # Efficiency only
129  det = [d for d in DETECTORS if d in det_pid_var][0]
130  if f'_{particle}_' in det_pid_var:
131  cutAndCopyList(f'{particle}+:eff_{det_pid_var}_cut0{int(det_cut_value*10)}', f'{particle}+:total_{det}',
132  f'{det_pid_var} > {det_cut_value}', path=main)
133  variablesToHistogram(f'{particle}+:eff_{det_pid_var}_cut0{int(det_cut_value*10)}',
134  (KIN_VAR_NAMES[0], *KIN_VAR_BINS[0]),
135  filename=DET_PID_OUTPUT_FILENAME,
136  path=main,
137  directory=f'eff_cut0{int(det_cut_value*10)}_{particle}_{det_pid_var}_{KIN_VAR_NAMES[0]}')
138  # Total rates:
139  for var, bin_tuple in zip(KIN_VAR_NAMES, KIN_VAR_BINS):
140  variablesToHistogram(f'{particle}+:total',
141  (var, *bin_tuple),
142  filename=GLOBALID_OUTPUT_FILENAME,
143  path=main,
144  directory=f'total_{particle}_{var}')
145  for det in DETECTORS:
146  variablesToHistogram(f'{particle}+:total_{det}',
147  (KIN_VAR_NAMES[0], *KIN_VAR_BINS[0]),
148  filename=DET_PID_OUTPUT_FILENAME,
149  path=main,
150  directory=f'total_{det}_{particle}_{KIN_VAR_NAMES[0]}')
151 
152  main.add_module('Progress')
153  basf2.process(main)
154  print(basf2.statistics)
155 
156 
157 def add_global_plots():
158  """
159  Adds TEfficiency objects to the produced ntuple for global PID.
160  """
161  root_file = ROOT.TFile(GLOBALID_OUTPUT_FILENAME, "UPDATE")
162  for pid, particle in zip(GLOBAL_ID_NAMES, PARTICLE_TYPES):
163  for var in KIN_VAR_NAMES:
164  total_obs_th1 = root_file.Get(f'total_{particle}_{var}').Get(var)
165  for cut_val in THRESHOLDS:
166  eff_cut_obs_th1 = root_file.Get(f'cut0{int(cut_val*10)}_{particle}_{pid}_{var}').Get(var)
167  teff = ROOT.TEfficiency(eff_cut_obs_th1, total_obs_th1)
168  teff.SetName(f'cut0{int(cut_val*10)}_{particle}_{pid}_{var}_eff')
169  teff.SetTitle(f'Efficiency for {pid} > {cut_val} for {particle}; {var}; Efficiency')
170  teff.GetListOfFunctions().Add(
171  ROOT.TNamed("Description", f"Efficiency plot of {pid} for {particle}")
172  )
173  teff.GetListOfFunctions().Add(ROOT.TNamed("Check", "Efficiency should not decrease"))
174  teff.GetListOfFunctions().Add(ROOT.TNamed("Contact", PID_EXPERT))
175  options = DEFAULT_OPTIONS
176  if cut_val == THRESHOLDS[-1]:
177  options += " ,shifter"
178  teff.GetListOfFunctions().Add(ROOT.TNamed("MetaOptions", options))
179  teff.Write()
180  for fake_pid in GLOBAL_ID_NAMES:
181  if fake_pid == pid:
182  continue
183  fake_cut_obs_th1 = root_file.Get(f'fake_cut0{int(cut_val*10)}_{particle}_{fake_pid}_{var}').Get(var)
184  tfake = ROOT.TEfficiency(fake_cut_obs_th1, total_obs_th1)
185  tfake.SetName(f'cut0{int(cut_val*10)}_{particle}_{fake_pid}_{var}_fake')
186  tfake.SetTitle(f'Fake rate for {fake_pid} > {cut_val} for {particle}; {var}; Fake rate')
187  tfake.GetListOfFunctions().Add(
188  ROOT.TNamed("Description", f"Fake rate plot of {fake_pid} for {particle}")
189  )
190  tfake.GetListOfFunctions().Add(ROOT.TNamed("Check", "Fake rates should not increase"))
191  tfake.GetListOfFunctions().Add(ROOT.TNamed("Contact", PID_EXPERT))
192  tfake.GetListOfFunctions().Add(ROOT.TNamed("MetaOptions", DEFAULT_OPTIONS))
193  tfake.Write()
194  ROOT.TNamed(
195  "Description",
196  "Global Charged PID validation plots. Shifter plots are the log-likelihood PID efficiency plots \
197  for e, μ, π, K and p particles in bins of momentum and cosTheta. If any degradation of efficiency \
198  is spotted here, please take a look at the expert plots. A decrease of efficiency might indicate: \
199  a) a work to improve the data/MC agreement; b) possible decrease of fake rate levels, \
200  see the expert plots in this section; \
201  c) a problem with a sub-detector likelihood, please take a look at \
202  the expert section of detector-only PID section. ").Write()
203  root_file.Write()
204  root_file.Close()
205 
206 
207 def add_detector_plots():
208  """
209  Adds TEfficiency objects to the produced ntuple for detector PID.
210  """
211  root_file = ROOT.TFile(DET_PID_OUTPUT_FILENAME, "UPDATE")
212  var = KIN_VAR_NAMES[0]
213  cut_val = THRESHOLDS[0]
214  for det_pid_var in DETECTOR_PIDS:
215  for particle in PARTICLE_TYPES:
216  det = [d for d in DETECTORS if d in det_pid_var][0]
217  total_obs_th1 = root_file.Get(f'total_{det}_{particle}_{var}').Get(var)
218  if f'_{particle}_' in det_pid_var:
219  eff_cut_obs_th1 = root_file.Get(f'eff_cut0{int(cut_val*10)}_{particle}_{det_pid_var}_{var}').Get(var)
220  teff = ROOT.TEfficiency(eff_cut_obs_th1, total_obs_th1)
221  teff.SetName(f'cut0{int(cut_val*10)}_{particle}_{det_pid_var}_{var}_eff')
222  teff.SetTitle(f'Efficiency for {det_pid_var} > {cut_val} for {particle}; {var}; Efficiency')
223  teff.GetListOfFunctions().Add(
224  ROOT.TNamed("Description", f"Efficiency plot of {det_pid_var} for {particle}")
225  )
226  teff.GetListOfFunctions().Add(ROOT.TNamed("Check", "Efficiency should not decrease"))
227  teff.GetListOfFunctions().Add(ROOT.TNamed("Contact", get_expert(det_pid_var)))
228  # Only for experts:
229  teff.GetListOfFunctions().Add(ROOT.TNamed("MetaOptions", DEFAULT_OPTIONS))
230  teff.Write()
231  ROOT.TNamed(
232  "Description",
233  "Detector-based Charged PID validation."
234  ).Write()
235 
236  root_file.Write()
237  root_file.Close()
238 
239 
240 run_b2analysis()
241 add_global_plots()
242 add_detector_plots()