Belle II Software development
0_analysis_charged_pid.py
1#!/usr/bin/env python3
2
3
10
11"""
12<header>
13 <input>PartGunChargedStableGenSim.root</input>
14 <output>ChargedPID_Validation.root,Detector_ChargedPID_Validation.root</output>
15 <contact>Sviatoslav Bilokin (sviatoslav.bilokin@desy.de)</contact>
16 <description>
17 Check the PID efficiency and fake rates
18 </description>
19</header>
20"""
21
22import basf2
23import ROOT
24from modularAnalysis import cutAndCopyList, inputMdst, fillParticleList, variablesToHistogram
25from reconstruction import add_reconstruction
26from variables import variables
27
28INPUT_FILENAME = "../PartGunChargedStableGenSim.root"
29GLOBALID_OUTPUT_FILENAME = "Global_ChargedPID_Validation.root"
30DET_PID_OUTPUT_FILENAME = "Detector_ChargedPID_Validation.root"
31
32GLOBAL_ID_NAMES = ['electronID', 'muonID', 'pionID', 'kaonID', 'protonID']
33PARTICLE_TYPES = ['e', 'mu', 'pi', 'K', 'p']
34PARTICLE_PDGS = [11, 13, 211, 321, 2212]
35DETECTORS = ['SVD', 'CDC', 'TOP', 'ARICH', 'ECL', 'KLM']
36DETECTOR_EXPERTS = ["luigi.corona@pi.infn.it", # SVD
37 "alexander.glazov@desy.de", # CDC
38 "marko.staric@ijs.si", # TOP
39 "luka.santelj@ijs.si", # ARICH
40 "torben.ferber@kit.edu", # ECL
41 "piilonen@vt.edu" # KLM
42 ]
43DETECTOR_ACCEPTANCE_CUTS = [
44 "inCDCAcceptance", # SVD
45 "inCDCAcceptance", # CDC
46 "inTOPAcceptance", # TOP
47 "inARICHAcceptance", # ARICH
48 "inECLAcceptance", # ECL
49 "inKLMAcceptance" # KLM
50]
51
52PID_EXPERT = "Alessandro Gaz. alessandro.gaz@pd.infn.it"
53
54THRESHOLDS = [0.2, 0.8]
55KIN_VAR_NAMES = ['p', 'cosTheta']
56KIN_VAR_BINS = [(20, 0., 5), (10, -1, 1)]
57DEFAULT_OPTIONS = "pvalue-warn=0.5"
58
59
60def get_expert(pid: str, for_detector: bool = False):
61 """
62 Returns the email of expert for detector or PID.
63 """
64 for det, exp in zip(DETECTORS, DETECTOR_EXPERTS):
65 if f'_{det}' in pid:
66 return exp
67 return "sviatoslav.bilokin@desy.de"
68
69
70def add_aliases():
71 """
72 Returns aliases for the detector probability variables
73 """
74 result = []
75 for pdg, name in zip(PARTICLE_PDGS, PARTICLE_TYPES):
76 for det in DETECTORS:
77 new_name = f'pidProbabilityExpert_{name}_{det}'
78 variables.addAlias(new_name, f'pidProbabilityExpert({pdg},{det})')
79 result += [new_name]
80 return result
81
82
83DETECTOR_PIDS = add_aliases()
84
85
86def run_b2analysis():
87 """
88 Function to produce the validation ntuples via basf2.
89 """
90 main = basf2.Path()
91 inputMdst(INPUT_FILENAME, path=main)
92 add_reconstruction(path=main)
93 track_quality_cuts = 'isSignal > 0'
94
95 for pid, particle in zip(GLOBAL_ID_NAMES, PARTICLE_TYPES):
96 # Total rates:
97 fillParticleList(f'{particle}+:total', track_quality_cuts, path=main)
98 # Total rates for subdetector:
99 for det, cut in zip(DETECTORS, DETECTOR_ACCEPTANCE_CUTS):
100 fillParticleList(f'{particle}+:total_{det}', f'{track_quality_cuts} and {cut}', path=main)
101
102 # Efficiency cuts:
103 for cut_val in THRESHOLDS:
104 cutAndCopyList(f'{particle}+:cut0{int(cut_val*10)}', f'{particle}+:total',
105 f'{pid} > {cut_val}', path=main)
106 for var, bin_tuple in zip(KIN_VAR_NAMES, KIN_VAR_BINS):
107 variablesToHistogram(f'{particle}+:cut0{int(cut_val*10)}',
108 (var, *bin_tuple),
109 filename=GLOBALID_OUTPUT_FILENAME,
110 path=main,
111 directory=f'cut0{int(cut_val*10)}_{particle}_{pid}_{var}')
112 # Fake rate cuts:
113 for fake_pid in GLOBAL_ID_NAMES:
114 if fake_pid == pid:
115 continue
116 cutAndCopyList(f'{particle}+:fake_{fake_pid}_cut0{int(cut_val*10)}', f'{particle}+:total',
117 f'{fake_pid} > {cut_val}', path=main)
118 for var, bin_tuple in zip(KIN_VAR_NAMES, KIN_VAR_BINS):
119 variablesToHistogram(f'{particle}+:fake_{fake_pid}_cut0{int(cut_val*10)}',
120 (var, *bin_tuple),
121 filename=GLOBALID_OUTPUT_FILENAME,
122 path=main,
123 directory=f'fake_cut0{int(cut_val*10)}_{particle}_{fake_pid}_{var}')
124
125 # Run detector based selection only for the lower threshold and the momentum bins:
126 det_cut_value = THRESHOLDS[0]
127 for det_pid_var in DETECTOR_PIDS:
128 # Efficiency only
129 det = [d for d in DETECTORS if d in det_pid_var][0]
130 if f'_{particle}_' in det_pid_var:
131 cutAndCopyList(f'{particle}+:eff_{det_pid_var}_cut0{int(det_cut_value*10)}', f'{particle}+:total_{det}',
132 f'{det_pid_var} > {det_cut_value}', path=main)
133 variablesToHistogram(f'{particle}+:eff_{det_pid_var}_cut0{int(det_cut_value*10)}',
134 (KIN_VAR_NAMES[0], *KIN_VAR_BINS[0]),
135 filename=DET_PID_OUTPUT_FILENAME,
136 path=main,
137 directory=f'eff_cut0{int(det_cut_value*10)}_{particle}_{det_pid_var}_{KIN_VAR_NAMES[0]}')
138 # Total rates:
139 for var, bin_tuple in zip(KIN_VAR_NAMES, KIN_VAR_BINS):
140 variablesToHistogram(f'{particle}+:total',
141 (var, *bin_tuple),
142 filename=GLOBALID_OUTPUT_FILENAME,
143 path=main,
144 directory=f'total_{particle}_{var}')
145 for det in DETECTORS:
146 variablesToHistogram(f'{particle}+:total_{det}',
147 (KIN_VAR_NAMES[0], *KIN_VAR_BINS[0]),
148 filename=DET_PID_OUTPUT_FILENAME,
149 path=main,
150 directory=f'total_{det}_{particle}_{KIN_VAR_NAMES[0]}')
151
152 main.add_module('Progress')
153 basf2.process(main)
154 print(basf2.statistics)
155
156
157def add_global_plots():
158 """
159 Adds TEfficiency objects to the produced ntuple for global PID.
160 """
161 root_file = ROOT.TFile(GLOBALID_OUTPUT_FILENAME, "UPDATE")
162 for pid, particle in zip(GLOBAL_ID_NAMES, PARTICLE_TYPES):
163 for var in KIN_VAR_NAMES:
164 total_obs_th1 = root_file.Get(f'total_{particle}_{var}').Get(var)
165 for cut_val in THRESHOLDS:
166 eff_cut_obs_th1 = root_file.Get(f'cut0{int(cut_val*10)}_{particle}_{pid}_{var}').Get(var)
167 teff = ROOT.TEfficiency(eff_cut_obs_th1, total_obs_th1)
168 teff.SetName(f'cut0{int(cut_val*10)}_{particle}_{pid}_{var}_eff')
169 teff.SetTitle(f'Efficiency for {pid} > {cut_val} for {particle}; {var}; Efficiency')
170 teff.GetListOfFunctions().Add(
171 ROOT.TNamed("Description", f"Efficiency plot of {pid} for {particle}")
172 )
173 teff.GetListOfFunctions().Add(ROOT.TNamed("Check", "Efficiency should not decrease"))
174 teff.GetListOfFunctions().Add(ROOT.TNamed("Contact", PID_EXPERT))
175 options = DEFAULT_OPTIONS
176 if cut_val == THRESHOLDS[-1]:
177 options += " ,shifter"
178 teff.GetListOfFunctions().Add(ROOT.TNamed("MetaOptions", options))
179 teff.Write()
180 for fake_pid in GLOBAL_ID_NAMES:
181 if fake_pid == pid:
182 continue
183 fake_cut_obs_th1 = root_file.Get(f'fake_cut0{int(cut_val*10)}_{particle}_{fake_pid}_{var}').Get(var)
184 tfake = ROOT.TEfficiency(fake_cut_obs_th1, total_obs_th1)
185 tfake.SetName(f'cut0{int(cut_val*10)}_{particle}_{fake_pid}_{var}_fake')
186 tfake.SetTitle(f'Fake rate for {fake_pid} > {cut_val} for {particle}; {var}; Fake rate')
187 tfake.GetListOfFunctions().Add(
188 ROOT.TNamed("Description", f"Fake rate plot of {fake_pid} for {particle}")
189 )
190 tfake.GetListOfFunctions().Add(ROOT.TNamed("Check", "Fake rates should not increase"))
191 tfake.GetListOfFunctions().Add(ROOT.TNamed("Contact", PID_EXPERT))
192 tfake.GetListOfFunctions().Add(ROOT.TNamed("MetaOptions", DEFAULT_OPTIONS))
193 tfake.Write()
194 ROOT.TNamed(
195 "Description",
196 "Global Charged PID validation plots. Shifter plots are the log-likelihood PID efficiency plots \
197 for e, μ, π, K and p particles in bins of momentum and cosTheta. If any degradation of efficiency \
198 is spotted here, please take a look at the expert plots. A decrease of efficiency might indicate: \
199 a) a work to improve the data/MC agreement; b) possible decrease of fake rate levels, \
200 see the expert plots in this section; \
201 c) a problem with a sub-detector likelihood, please take a look at \
202 the expert section of detector-only PID section. ").Write()
203 root_file.Write()
204 root_file.Close()
205
206
207def add_detector_plots():
208 """
209 Adds TEfficiency objects to the produced ntuple for detector PID.
210 """
211 root_file = ROOT.TFile(DET_PID_OUTPUT_FILENAME, "UPDATE")
212 var = KIN_VAR_NAMES[0]
213 cut_val = THRESHOLDS[0]
214 for det_pid_var in DETECTOR_PIDS:
215 for particle in PARTICLE_TYPES:
216 det = [d for d in DETECTORS if d in det_pid_var][0]
217 total_obs_th1 = root_file.Get(f'total_{det}_{particle}_{var}').Get(var)
218 if f'_{particle}_' in det_pid_var:
219 eff_cut_obs_th1 = root_file.Get(f'eff_cut0{int(cut_val*10)}_{particle}_{det_pid_var}_{var}').Get(var)
220 teff = ROOT.TEfficiency(eff_cut_obs_th1, total_obs_th1)
221 teff.SetName(f'cut0{int(cut_val*10)}_{particle}_{det_pid_var}_{var}_eff')
222 teff.SetTitle(f'Efficiency for {det_pid_var} > {cut_val} for {particle}; {var}; Efficiency')
223 teff.GetListOfFunctions().Add(
224 ROOT.TNamed("Description", f"Efficiency plot of {det_pid_var} for {particle}")
225 )
226 teff.GetListOfFunctions().Add(ROOT.TNamed("Check", "Efficiency should not decrease"))
227 teff.GetListOfFunctions().Add(ROOT.TNamed("Contact", get_expert(det_pid_var)))
228 # Only for experts:
229 teff.GetListOfFunctions().Add(ROOT.TNamed("MetaOptions", DEFAULT_OPTIONS))
230 teff.Write()
231 ROOT.TNamed(
232 "Description",
233 "Detector-based Charged PID validation."
234 ).Write()
235
236 root_file.Write()
237 root_file.Close()
238
239
240run_b2analysis()
241add_global_plots()
242add_detector_plots()