Belle II Software development
svd_time.py
1#!/usr/bin/env python3
2
3
10
11from pathlib import Path
12
13import pandas as pd
14import seaborn as sns
15import matplotlib
16import matplotlib.pyplot as plt
17import matplotlib.ticker as ticker
18import re
19
20from prompt import ValidationSettings
21import svd.validation_utils as vu
22
23import ROOT as r
24r.PyConfig.IgnoreCommandLineOptions = True
25r.gROOT.SetBatch()
26
27matplotlib.use('Agg')
28plt.style.use("belle2")
29
30
31settings = ValidationSettings(name="caf_svd_time",
32 description=__doc__,
33 download_files=[],
34 expert_config=None)
35
36
37def run_validation(job_path, input_data_path=None, **kwargs):
38 '''job_path will be replaced with path/to/calibration_results
39 input_data_path will be replaced with path/to/data_path used for calibration
40 e.g. /group/belle2/dataprod/Data/PromptSkim/'''
41
42 collector_output_dir = Path(job_path) / 'SVDTimeValidation/0/collector_output/default/'
43 output_dir = Path(kwargs.get('output_dir', 'SVDTimeValidation_output'))
44 plots_per_run = output_dir / 'runs'
45
46 plots_per_run.mkdir(parents=True, exist_ok=True)
47
48 files = list(collector_output_dir.glob('**/CollectorOutput.root'))
49
50 agreements = {algo: {} for algo in vu.time_algorithms}
51 precisions = {algo: {} for algo in vu.time_algorithms}
52 discriminations = {algo: {} for algo in vu.time_algorithms}
53 shift_agreements = {algo: {} for algo in vu.time_algorithms}
54 entries_onTracks = {algo: {} for algo in vu.time_algorithms}
55 entries_eventT0 = {algo: {} for algo in vu.time_algorithms}
56
57 roc_U = {algo: {} for algo in vu.time_algorithms}
58 roc_V = {algo: {} for algo in vu.time_algorithms}
59
60 CollectorHistograms = vu.get_merged_collector_histograms(files)
61
62 max_total_run = 0
63 total_item = 0
64 for algo in CollectorHistograms:
65 for exp in CollectorHistograms[algo]:
66 nRun = len(CollectorHistograms[algo][exp])
67 total_item += nRun
68 if nRun > max_total_run:
69 max_total_run = nRun
70 total_length = max_total_run * len(vu.time_algorithms)
71
72 print(f'Looping over {total_item} items')
73 count = 0
74 vu.progress(0, total_item)
75
76 shift_histos = {}
77 shift_histos_merged_over_ladder = {}
78
79 for algo in CollectorHistograms:
80 shift_histos[algo] = {}
81 shift_histos_merged_over_ladder[algo] = {}
82 for exp in CollectorHistograms[algo]:
83 for run in CollectorHistograms[algo][exp]:
84 # print(f"working with : algo {algo} exp {exp} run {run}")
85
86 histos = vu.get_histos(CollectorHistograms[algo][exp][run])
87
88 if histos is None:
89 print(f'Skipping file algo {algo} exp {exp} run {run}')
90 continue
91
92 # if some histogram is empty (too little stat) do not crash but skip that file for that calibration
93 try:
94 entries_eventT0_ = histos['eventT0'].GetEntries()
95 if run not in entries_eventT0[algo] or entries_eventT0_ > entries_eventT0[algo][run]:
96 agreements[algo][run] = {key: vu.get_agreement(histos['eventT0'], h_diff)
97 for key, h_diff in histos['diff'].items()}
98 precisions[algo][run] = {key: vu.get_precision(h_diff)
99 for key, h_diff in histos['diff'].items()}
100 discriminations[algo][run] = {key: vu.get_roc_auc(histos['onTracks'][key], histos['offTracks'][key])
101 for key in histos['onTracks']}
102 shift_agreements[algo][run] = {key: vu.get_shift_agreement(hShift)
103 for key, hShift in histos['timeShifter'].items()}
104 entries_onTracks[algo][run] = {key: val.GetEntries() for key, val in histos['onTracks'].items()}
105 entries_eventT0[algo][run] = entries_eventT0_
106
107 for key, hShift in histos['timeShifter'].items():
108 if key in shift_histos[algo]:
109 shift_histos[algo][key].Add(hShift)
110 else:
111 shift_histos[algo][key] = hShift.Clone()
112 shift_histos[algo][key].SetDirectory(0)
113 sensor_id = re.findall(r'\d+', key) + [key[-1]]
114 keyGroup = f'L{sensor_id[0]}S{sensor_id[2]}{sensor_id[3]}'
115 if keyGroup in shift_histos_merged_over_ladder[algo]:
116 shift_histos_merged_over_ladder[algo][keyGroup].Add(hShift)
117 else:
118 shift_histos_merged_over_ladder[algo][keyGroup] = hShift.Clone()
119 shift_histos_merged_over_ladder[algo][keyGroup].SetDirectory(0)
120
121 vu.make_combined_plot('*U', histos,
122 title=f'exp {exp} run {run} U {algo}')
123 plt.savefig(plots_per_run / f'{exp}_{run}_U_{algo}.pdf')
124 plt.close()
125
126 vu.make_combined_plot('*V', histos,
127 title=f'exp {exp} run {run} V {algo}')
128 plt.savefig(plots_per_run / f'{exp}_{run}_V_{algo}.pdf')
129 plt.close()
130
131 roc_U[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*U'),
132 vu.get_combined(histos['offTracks'], '*U'))
133 roc_V[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*V'),
134 vu.get_combined(histos['offTracks'], '*V'))
135 except AttributeError:
136 print(f'Skipping file algo {algo} exp {exp} run {run}')
137 continue
138 vu.progress(count + 1, total_item)
139 count += 1
140
141 print()
142
143 for algo, KeyHisto in shift_histos.items():
144 c2 = r.TCanvas("c2", "c2", 640, 480)
145 outPDF = f"{output_dir}/shift_histograms_{algo}.pdf"
146 c2.Print(outPDF + "[")
147 onePad = r.TPad("onePad", "onePad", 0, 0, 1, 1)
148 onePad.SetMargin(0.1, 0.2, 0.1, 0.1)
149 onePad.SetNumber(1)
150 onePad.Draw()
151 onePad.cd()
152 hShiftHisto = vu.get_shift_plot(shift_histos_merged_over_ladder[algo])
153 hShiftHisto.Draw('COLZ')
154 c2.Print(outPDF, "Title:" + hShiftHisto.GetName())
155
156 c1 = r.TCanvas("c1", "c1", 640, 480)
157 topPad = r.TPad("topPad", "topPad", 0, 0.5, 1, 1)
158 btmPad = r.TPad("btmPad", "btmPad", 0, 0, 1, 0.5)
159 topPad.SetMargin(0.1, 0.1, 0, 0.149)
160 btmPad.SetMargin(0.1, 0.1, 0.303, 0)
161 topPad.SetNumber(1)
162 btmPad.SetNumber(2)
163 topPad.Draw()
164 btmPad.Draw()
165 isOdd = True
166 for key, hShift in KeyHisto.items():
167 hShift.SetStats(0)
168 for yn in range(hShift.GetNbinsY()):
169 norm = (hShift.ProjectionX("tmp", yn + 1, yn + 1, "")).GetMaximum()
170 if norm <= 0:
171 continue
172 for xn in range(hShift.GetNbinsX()):
173 hShift.SetBinContent(xn + 1, yn + 1, hShift.GetBinContent(xn + 1, yn + 1) / norm)
174 if isOdd:
175 topPad.cd()
176 hShift.Draw("colz")
177 else:
178 btmPad.cd()
179 hShift.Draw("colz")
180 c1.Print(outPDF, "Title:" + hShift.GetName())
181 isOdd = not isOdd
182 c1.Print(outPDF + "]")
183
184 dd = {}
185 runs = sorted(agreements[vu.time_algorithms[0]])
186 dd['run'] = sum([[i]*len(vu.names_sides) for i in runs], [])
187 dd['name'] = vu.names_sides*len(runs)
188 dd['side'] = [i[-1] for i in dd['name']]
189
190 for algo in vu.time_algorithms:
191 dd[f'agreement_{algo}'] = [agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
192 dd[f'precision_{algo}'] = [precisions[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
193 dd[f'discrimination_{algo}'] = [discriminations[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
194 dd[f'shift_agreement_{algo}'] = [shift_agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
195 dd[f'entries_onTracks_{algo}'] = [entries_onTracks[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
196 dd[f'entries_eventT0_{algo}'] = [entries_eventT0[algo][run] for run, side in zip(dd['run'], dd['name'])]
197
198 # Make ROC plots
199 for run in runs:
200 plt.figure()
201 plt.plot(*roc_U['CoG6'][run], 'k-', label='CoG6 U')
202 plt.plot(*roc_V['CoG6'][run], 'k:', label='CoG6 V')
203 plt.plot(*roc_U['CoG3'][run], 'b-', label='CoG3 U')
204 plt.plot(*roc_V['CoG3'][run], 'b:', label='CoG3 V')
205 plt.plot(*roc_U['ELS3'][run], 'r-', label='ELS3 U')
206 plt.plot(*roc_V['ELS3'][run], 'r:', label='ELS3 V')
207 plt.legend(loc='lower left')
208 plt.xlabel('sgn efficiency')
209 plt.ylabel('bkg rejection')
210 plt.title(f'ROC run {run}')
211 plt.xlim((0, 1))
212 plt.ylim((0, 1))
213 plt.tight_layout()
214 plt.savefig(plots_per_run / f'ROC_{run}.pdf')
215 plt.close()
216
217 df = pd.DataFrame(dd)
218 df.to_pickle(output_dir / 'df.pkl')
219
220 # df = pd.read_pickle('df.pkl')
221
222 print('Making combined plots')
223
224 for algo in vu.time_algorithms:
225 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
226 ax = sns.violinplot(x='run', y=f'agreement_{algo}', hue='side', data=df, split=True)
227 ax.set_ylim([-2, 2])
228 ax.xaxis.set_minor_locator(ticker.NullLocator())
229 plt.axhline(0, color='black', linestyle='--')
230 plt.axhline(0.5, color='black', linestyle=':')
231 plt.axhline(-0.5, color='black', linestyle=':')
232 plt.setp(ax.get_xticklabels(), rotation=90)
233 plt.tight_layout()
234 plt.savefig(output_dir / f'agreement_{algo}.pdf')
235 plt.close()
236
237 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
238 ax = sns.violinplot(x='run', y=f'precision_{algo}', hue='side', data=df, split=True)
239 ax.set_ylim([0, 50])
240 ax.xaxis.set_minor_locator(ticker.NullLocator())
241 plt.axhline(10, color='black', linestyle=':')
242 plt.axhline(20, color='black', linestyle=':')
243 plt.setp(ax.get_xticklabels(), rotation=90)
244 plt.tight_layout()
245 plt.savefig(output_dir / f'precision_{algo}.pdf')
246 plt.close()
247
248 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
249 ax = sns.violinplot(x='run', y=f'discrimination_{algo}', hue='side', data=df, split=True)
250 ax.set_ylim([0.5, 1])
251 ax.xaxis.set_minor_locator(ticker.NullLocator())
252 plt.axhline(0.8, color='black', linestyle=':')
253 plt.axhline(0.9, color='black', linestyle=':')
254 plt.setp(ax.get_xticklabels(), rotation=90)
255 plt.tight_layout()
256 plt.savefig(output_dir / f'discrimination_{algo}.pdf')
257 plt.close()
258
259 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
260 ax = sns.violinplot(x='run', y=f'shift_agreement_{algo}', hue='side', data=df, split=True, cut=0)
261 ax.xaxis.set_minor_locator(ticker.NullLocator())
262 ax.set_ylim([0.0, 3.5])
263 plt.axhline(0, color='black', linestyle='--')
264 plt.axhline(0.5, color='black', linestyle=':')
265 plt.axhline(1.0, color='black', linestyle=':')
266 plt.axhline(2.0, color='black', linestyle=':')
267 plt.setp(ax.get_xticklabels(), rotation=90)
268 plt.tight_layout()
269 plt.savefig(output_dir / f'shift_agreement_{algo}.pdf')
270 plt.close()
271
272 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
273 ax = sns.violinplot(x='run', y=f'entries_onTracks_{algo}', hue='side', data=df, split=True, cut=0)
274 ax.xaxis.set_minor_locator(ticker.NullLocator())
275 plt.setp(ax.get_xticklabels(), rotation=90)
276 plt.tight_layout()
277 plt.savefig(output_dir / f'entries_onTracks_{algo}.pdf')
278 plt.close()
279
280 plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
281 ax = sns.violinplot(x='run', y=f'entries_eventT0_{algo}', hue='side', data=df, split=True)
282 ax.xaxis.set_minor_locator(ticker.NullLocator())
283 plt.setp(ax.get_xticklabels(), rotation=90)
284 plt.tight_layout()
285 plt.savefig(output_dir / f'entries_eventT0_{algo}.pdf')
286 plt.close()
287
288
289if __name__ == '__main__':
290
291 import argparse
292 parser = argparse.ArgumentParser(description=__doc__,
293 formatter_class=argparse.RawTextHelpFormatter)
294
295 # b2val-prompt-run wants to pass to the script also input_data_path
296 # and requested_iov. As they are not required by this validation I just accept
297 # them together with calibration_results_dir and then ignore them
298 parser.add_argument('calibration_results_dir',
299 help='The directory that contains the collector outputs',
300 nargs='+')
301
302 parser.add_argument('-o', '--output_dir',
303 help='The directory where all the output will be saved',
304 default='SVDTimeValidation_output')
305 args = parser.parse_args()
306
307 run_validation(args.calibration_results_dir[0], output_dir=args.output_dir)