Belle II Software prerelease-11-00-00a
svd_time.py
1#!/usr/bin/env python3
2
3
10
11from pathlib import Path
12
13import pandas as pd
14import seaborn as sns
15import matplotlib
16import matplotlib.pyplot as plt
17import matplotlib.ticker as ticker
18import re
19
20from prompt import ValidationSettings
21import svd.validation_utils as vu
22
23import ROOT as r
24r.PyConfig.IgnoreCommandLineOptions = True
25r.gROOT.SetBatch()
26
27matplotlib.use('Agg')
28plt.style.use("belle2")
29
30
31settings = ValidationSettings(name="caf_svd_time",
32 description=__doc__,
33 download_files=[],
34 expert_config=None)
35
36
37def run_validation(job_path, input_data_path=None, **kwargs):
38 '''job_path will be replaced with path/to/calibration_results
39 input_data_path will be replaced with path/to/data_path used for calibration
40 e.g. /group/belle2/dataprod/Data/PromptSkim/'''
41
42 collector_output_dir = Path(job_path) / 'SVDTimeValidation/0/collector_output/default/'
43 output_dir = Path(kwargs.get('output_dir', 'SVDTimeValidation_output'))
44 shift_detailed = kwargs.get('shift_detailed', False)
45 plots_per_run = output_dir / 'runs'
46
47 plots_per_run.mkdir(parents=True, exist_ok=True)
48
49 files = list(collector_output_dir.glob('**/CollectorOutput.root'))
50
51 agreements = {algo: {} for algo in vu.time_algorithms}
52 precisions = {algo: {} for algo in vu.time_algorithms}
53 discriminations = {algo: {} for algo in vu.time_algorithms}
54 shift_agreements = {algo: {} for algo in vu.time_algorithms}
55 entries_onTracks = {algo: {} for algo in vu.time_algorithms}
56 entries_eventT0 = {algo: {} for algo in vu.time_algorithms}
57 t0_agreements = {algo: {} for algo in vu.time_algorithms}
58 t0_differences = {algo: {} for algo in vu.time_algorithms}
59 absolute_shifts_values = {algo: {} for algo in vu.time_algorithms}
60
61 roc_U = {algo: {} for algo in vu.time_algorithms}
62 roc_V = {algo: {} for algo in vu.time_algorithms}
63
64 CollectorHistograms = vu.get_merged_collector_histograms(files)
65
66 max_total_run = 0
67 total_item = 0
68 for algo in CollectorHistograms:
69 for exp in CollectorHistograms[algo]:
70 nRun = len(CollectorHistograms[algo][exp])
71 total_item += nRun
72 if nRun > max_total_run:
73 max_total_run = nRun
74 total_length = max_total_run * len(vu.time_algorithms)
75
76 print(f'Looping over {total_item} items')
77 count = 0
78 vu.progress(0, total_item)
79
80 shift_histos = {}
81 shift_histos_merged_over_ladder = {}
82
83 for algo in CollectorHistograms:
84 shift_histos[algo] = {}
85 shift_histos_merged_over_ladder[algo] = {}
86 for exp in CollectorHistograms[algo]:
87 for run in CollectorHistograms[algo][exp]:
88 # print(f"working with : algo {algo} exp {exp} run {run}")
89 histos = vu.get_histos(CollectorHistograms[algo][exp][run])
90
91 if histos is None:
92 print(f'Skipping file algo {algo} exp {exp} run {run}')
93 continue
94
95 # if some histogram is empty (too little stat) do not crash but skip that file for that calibration
96 try:
97 entries_eventT0_ = histos['eventT0'].GetEntries()
98
99 if run not in entries_eventT0[algo] or entries_eventT0_ > entries_eventT0[algo][run]:
100 agreements[algo][run] = {key: vu.get_agreement(histos['eventT0'], h_diff)
101 for key, h_diff in histos['diff'].items()}
102
103 t0_agreements[algo][run] = vu.get_agreement2(histos['CDCeventT0'], histos['SVDeventT0'], min_entries=10)
104 t0_differences[algo][run] = vu.get_difference(histos['SVDeventT0'], histos['CDCeventT0'], min_entries=10)
105
106 precisions[algo][run] = {key: vu.get_precision(h_diff)
107 for key, h_diff in histos['diff'].items()}
108 discriminations[algo][run] = {key: vu.get_roc_auc(histos['onTracks'][key], histos['offTracks'][key])
109 for key in histos['onTracks']}
110 shift_agreements[algo][run] = {key: vu.get_shift_agreement(hShift)
111 for key, hShift in histos['timeShifter'].items()}
112 entries_onTracks[algo][run] = {key: val.GetEntries() for key, val in histos['onTracks'].items()}
113 entries_eventT0[algo][run] = entries_eventT0_
114 absolute_shifts_values[algo][run] = histos["absoluteShift"]
115
116 if shift_detailed:
117 for key, hShift in histos['timeShifter'].items():
118 if key in shift_histos[algo]:
119 shift_histos[algo][key].Add(hShift)
120 else:
121 shift_histos[algo][key] = hShift.Clone()
122 shift_histos[algo][key].SetDirectory(0)
123 sensor_id = re.findall(r'\d+', key) + [key[-1]]
124 keyGroup = f'L{sensor_id[0]}S{sensor_id[2]}{sensor_id[3]}'
125 if keyGroup in shift_histos_merged_over_ladder[algo]:
126 shift_histos_merged_over_ladder[algo][keyGroup].Add(hShift)
127 else:
128 shift_histos_merged_over_ladder[algo][keyGroup] = hShift.Clone()
129 shift_histos_merged_over_ladder[algo][keyGroup].SetDirectory(0)
130 vu.make_combined_plot('*U', histos,
131 title=f'exp {exp} run {run} U {algo}')
132 plt.savefig(plots_per_run / f'{exp}_{run}_U_{algo}.pdf')
133 plt.close()
134 vu.make_combined_plot('*V', histos,
135 title=f'exp {exp} run {run} V {algo}')
136 plt.savefig(plots_per_run / f'{exp}_{run}_V_{algo}.pdf')
137 plt.close()
138 roc_U[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*U'),
139 vu.get_combined(histos['offTracks'], '*U'))
140 roc_V[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*V'),
141 vu.get_combined(histos['offTracks'], '*V'))
142 except AttributeError:
143 print(f'Skipping file algo {algo} exp {exp} run {run}')
144 continue
145 # Free-up memory manually as I used `SetDirectory(0)`
146 for t0 in ['eventT0', 'CDCeventT0', 'SVDeventT0']:
147 h = histos.pop(t0)
148 h.__python_owns__ = False
149 h.Delete()
150
151 # absoluteShift is not a histogram, so just remove it from the dict without trying to delete
152 histos.pop("absoluteShift", None)
153
154 for histo_dict in histos.values():
155 for hh in histo_dict.values():
156 hh.__python_owns__ = False
157 hh.Delete()
158 del histos
159 for key, hh in CollectorHistograms[algo][exp][run].items():
160 if key not in ['hEventT0', 'hEventT0FromCDC', 'hEventT0FromSVD', 'hAbsoluteShiftValues']:
161 hh.__python_owns__ = False
162 hh.Delete()
163 vu.progress(count + 1, total_item)
164 count += 1
165
166 print()
167
168 if shift_detailed:
169 for algo, KeyHisto in shift_histos.items():
170 c2 = r.TCanvas("c2", "c2", 640, 480)
171 outPDF = f"{output_dir}/shift_histograms_{algo}.pdf"
172 c2.Print(outPDF + "[")
173 onePad = r.TPad("onePad", "onePad", 0, 0, 1, 1)
174 onePad.SetMargin(0.1, 0.2, 0.1, 0.1)
175 onePad.SetNumber(1)
176 onePad.Draw()
177 onePad.cd()
178 hShiftHisto = vu.get_shift_plot(shift_histos_merged_over_ladder[algo])
179 hShiftHisto.Draw('COLZ')
180 c2.Print(outPDF, "Title:" + hShiftHisto.GetName())
181
182 c1 = r.TCanvas("c1", "c1", 640, 480)
183 topPad = r.TPad("topPad", "topPad", 0, 0.5, 1, 1)
184 btmPad = r.TPad("btmPad", "btmPad", 0, 0, 1, 0.5)
185 topPad.SetMargin(0.1, 0.1, 0, 0.149)
186 btmPad.SetMargin(0.1, 0.1, 0.303, 0)
187 topPad.SetNumber(1)
188 btmPad.SetNumber(2)
189 topPad.Draw()
190 btmPad.Draw()
191 isOdd = True
192 for key, hShift in KeyHisto.items():
193 hShift.SetStats(0)
194 for yn in range(hShift.GetNbinsY()):
195 norm = (hShift.ProjectionX("tmp", yn + 1, yn + 1, "")).GetMaximum()
196 if norm <= 0:
197 continue
198 for xn in range(hShift.GetNbinsX()):
199 hShift.SetBinContent(xn + 1, yn + 1, hShift.GetBinContent(xn + 1, yn + 1) / norm)
200 if isOdd:
201 topPad.cd()
202 hShift.Draw("colz")
203 else:
204 btmPad.cd()
205 hShift.Draw("colz")
206 c1.Print(outPDF, "Title:" + hShift.GetName())
207 isOdd = not isOdd
208 c1.Print(outPDF + "]")
209
210 dd = {}
211 runs = sorted(agreements[vu.time_algorithms[0]])
212 dd['run'] = sum([[i]*len(vu.names_sides) for i in runs], [])
213 dd['name'] = vu.names_sides*len(runs)
214 dd['side'] = [i[-1] for i in dd['name']]
215
216 for algo in vu.time_algorithms:
217 dd[f'agreement_{algo}'] = [agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
218 dd[f'precision_{algo}'] = [precisions[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
219 dd[f'discrimination_{algo}'] = [discriminations[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
220 dd[f'shift_agreement_{algo}'] = [shift_agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
221 dd[f'entries_onTracks_{algo}'] = [entries_onTracks[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
222 dd[f'entries_eventT0_{algo}'] = [entries_eventT0[algo][run] for run, side in zip(dd['run'], dd['name'])]
223 dd[f'T0_agreement_{algo}'] = [t0_agreements[algo][run] for run, side in zip(dd['run'], dd['name'])]
224 dd[f'T0_difference_{algo}'] = [t0_differences[algo][run] for run, side in zip(dd['run'], dd['name'])]
225
226 # Make ROC plots
227 for run in runs:
228 plt.figure()
229 plt.plot(*roc_U['CoG6'][run], 'k-', label='CoG6 U')
230 plt.plot(*roc_V['CoG6'][run], 'k:', label='CoG6 V')
231 plt.plot(*roc_U['CoG3'][run], 'b-', label='CoG3 U')
232 plt.plot(*roc_V['CoG3'][run], 'b:', label='CoG3 V')
233 plt.plot(*roc_U['ELS3'][run], 'r-', label='ELS3 U')
234 plt.plot(*roc_V['ELS3'][run], 'r:', label='ELS3 V')
235 plt.legend(loc='lower left')
236 plt.xlabel('sgn efficiency')
237 plt.ylabel('bkg rejection')
238 plt.title(f'ROC run {run}')
239 plt.xlim((0, 1))
240 plt.ylim((0, 1))
241 plt.tight_layout()
242 plt.savefig(plots_per_run / f'ROC_{run}.pdf')
243 plt.close()
244
245 df = pd.DataFrame(dd)
246 df.to_pickle(output_dir / 'df.pkl')
247 # df = pd.read_pickle('df.pkl')
248
249 absolute_shifts_dict = {}
250 absolute_shifts_dict['run'] = sum([[i]*len(vu.names_layer_sides) for i in runs], []) # 4 layers * 2 sides
251 absolute_shifts_dict['name'] = vu.names_layer_sides*len(runs)
252 for algo in vu.time_algorithms:
253 absolute_shifts_dict[f'absolute_shift_values_{algo}'] = [
254 absolute_shifts_values[algo][run][layer_side] for run, layer_side in zip(
255 absolute_shifts_dict['run'], absolute_shifts_dict['name'])]
256
257 absolute_shifts_df = pd.DataFrame(absolute_shifts_dict)
258
259 print('Making combined plots')
260
261 for algo in vu.time_algorithms:
262 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
263 ax = sns.violinplot(x='run', y=f'agreement_{algo}', hue='side', data=df, split=True)
264 ax.set_ylim([-2, 2])
265 ax.xaxis.set_minor_locator(ticker.NullLocator())
266 plt.axhline(0, color='black', linestyle='--')
267 plt.axhline(0.5, color='black', linestyle=':')
268 plt.axhline(-0.5, color='black', linestyle=':')
269 plt.setp(ax.get_xticklabels(), rotation=90)
270 plt.tight_layout()
271 plt.savefig(output_dir / f'agreement_{algo}.pdf')
272 plt.close()
273
274 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
275 ax = sns.violinplot(x='run', y=f'precision_{algo}', hue='side', data=df, split=True)
276 ax.set_ylim([0, 50])
277 ax.xaxis.set_minor_locator(ticker.NullLocator())
278 plt.axhline(10, color='black', linestyle=':')
279 plt.axhline(20, color='black', linestyle=':')
280 plt.setp(ax.get_xticklabels(), rotation=90)
281 plt.tight_layout()
282 plt.savefig(output_dir / f'precision_{algo}.pdf')
283 plt.close()
284
285 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
286 ax = sns.violinplot(x='run', y=f'discrimination_{algo}', hue='side', data=df, split=True)
287 ax.set_ylim([0.5, 1])
288 ax.xaxis.set_minor_locator(ticker.NullLocator())
289 plt.axhline(0.8, color='black', linestyle=':')
290 plt.axhline(0.9, color='black', linestyle=':')
291 plt.setp(ax.get_xticklabels(), rotation=90)
292 plt.tight_layout()
293 plt.savefig(output_dir / f'discrimination_{algo}.pdf')
294 plt.close()
295
296 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
297 ax = sns.violinplot(x='run', y=f'shift_agreement_{algo}', hue='side', data=df, split=True, cut=0)
298 ax.xaxis.set_minor_locator(ticker.NullLocator())
299 ax.set_ylim([0.0, 3.5])
300 plt.axhline(0, color='black', linestyle='--')
301 plt.axhline(0.5, color='black', linestyle=':')
302 plt.axhline(1.0, color='black', linestyle=':')
303 plt.axhline(2.0, color='black', linestyle=':')
304 plt.setp(ax.get_xticklabels(), rotation=90)
305 plt.tight_layout()
306 plt.savefig(output_dir / f'shift_agreement_{algo}.pdf')
307 plt.close()
308
309 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
310 ax = sns.violinplot(x='run', y=f'entries_onTracks_{algo}', hue='side', data=df, split=True, cut=0)
311 ax.xaxis.set_minor_locator(ticker.NullLocator())
312 plt.setp(ax.get_xticklabels(), rotation=90)
313 plt.tight_layout()
314 plt.savefig(output_dir / f'entries_onTracks_{algo}.pdf')
315 plt.close()
316
317 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
318 ax = sns.violinplot(x='run', y=f'entries_eventT0_{algo}', hue='side', data=df, split=True)
319 ax.xaxis.set_minor_locator(ticker.NullLocator())
320 plt.setp(ax.get_xticklabels(), rotation=90)
321 plt.tight_layout()
322 plt.savefig(output_dir / f'entries_eventT0_{algo}.pdf')
323 plt.close()
324
325 for metric in ['agreement', 'difference']:
326 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
327 ax = sns.violinplot(x='run', y=f'T0_{metric}_{algo}', data=df, split=True)
328 ax.set_ylim([-2, 2])
329 ax.xaxis.set_minor_locator(ticker.NullLocator())
330 plt.axhline(0, color='black', linestyle='--')
331 plt.axhline(0.5, color='black', linestyle=':')
332 plt.axhline(-0.5, color='black', linestyle=':')
333 plt.setp(ax.get_xticklabels(), rotation=90)
334 plt.tight_layout()
335 plt.savefig(output_dir / f'T0_{metric}_{algo}.pdf')
336 plt.close()
337
338 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
339 ax = sns.scatterplot(
340 x='run', y=f'absolute_shift_values_{algo}',
341 hue='name', data=absolute_shifts_df,
342 marker='o', s=40,
343 )
344 ax.set_ylim([-5, 5])
345 run_values = sorted(absolute_shifts_df['run'].unique())
346 ax.set_xticks(run_values)
347 ax.set_xticklabels([str(r) for r in run_values])
348 ax.xaxis.set_minor_locator(ticker.NullLocator())
349 plt.axhline(0, color='black', linestyle='--')
350 plt.axhline(2, color='black', linestyle=':')
351 plt.axhline(-2, color='black', linestyle=':')
352 plt.setp(ax.get_xticklabels(), rotation=90)
353 plt.ylabel(f'absolute shift ({algo}) (ns)')
354 ax.legend(title='layer/side', ncol=2)
355 plt.tight_layout()
356 plt.savefig(output_dir / f'absolute_shift_{algo}.pdf')
357 plt.close()
358
359
360if __name__ == '__main__':
361
362 import argparse
363 parser = argparse.ArgumentParser(description=__doc__,
364 formatter_class=argparse.RawTextHelpFormatter)
365
366 # b2val-prompt-run wants to pass to the script also input_data_path
367 # and requested_iov. As they are not required by this validation I just accept
368 # them together with calibration_results_dir and then ignore them
369 parser.add_argument('calibration_results_dir',
370 help='The directory that contains the collector outputs',
371 nargs='+')
372
373 parser.add_argument('-o', '--output_dir',
374 help='The directory where all the output will be saved',
375 default='SVDTimeValidation_output')
376 parser.add_argument('-l',
377 help='Make additional pdf with details cluster size vs shift',
378 action='store_true')
379 args = parser.parse_args()
380
381 run_validation(args.calibration_results_dir[0], output_dir=args.output_dir, shift_detailed=args.l)