Belle II Software  release-08-00-04
svd_time.py
1 #!/usr/bin/env python3
2 
3 
10 
11 from pathlib import Path
12 
13 import pandas as pd
14 import seaborn as sns
15 import matplotlib
16 import matplotlib.pyplot as plt
17 import matplotlib.ticker as ticker
18 import re
19 
20 from prompt import ValidationSettings
21 import svd.validation_utils as vu
22 
23 import ROOT as r
24 r.PyConfig.IgnoreCommandLineOptions = True
25 
26 matplotlib.use('Agg')
27 plt.style.use("belle2")
28 
29 
30 settings = ValidationSettings(name="caf_svd_time",
31  description=__doc__,
32  download_files=[],
33  expert_config=None)
34 
35 
36 def run_validation(job_path, input_data_path=None, **kwargs):
37  '''job_path will be replaced with path/to/calibration_results
38  input_data_path will be replaced with path/to/data_path used for calibration
39  e.g. /group/belle2/dataprod/Data/PromptSkim/'''
40 
41  collector_output_dir = Path(job_path) / 'SVDTimeValidation/0/collector_output/default/'
42  output_dir = Path(kwargs.get('output_dir', 'SVDTimeValidation_output'))
43  plots_per_run = output_dir / 'runs'
44 
45  plots_per_run.mkdir(parents=True, exist_ok=True)
46 
47  files = list(collector_output_dir.glob('**/CollectorOutput.root'))
48 
49  agreements = {algo: {} for algo in vu.time_algorithms}
50  precisions = {algo: {} for algo in vu.time_algorithms}
51  discriminations = {algo: {} for algo in vu.time_algorithms}
52  shift_agreements = {algo: {} for algo in vu.time_algorithms}
53  entries_onTracks = {algo: {} for algo in vu.time_algorithms}
54  entries_eventT0 = {algo: {} for algo in vu.time_algorithms}
55 
56  roc_U = {algo: {} for algo in vu.time_algorithms}
57  roc_V = {algo: {} for algo in vu.time_algorithms}
58 
59  CollectorHistograms = vu.get_merged_collector_histograms(files)
60 
61  max_total_run = 0
62  total_item = 0
63  for algo in CollectorHistograms:
64  for exp in CollectorHistograms[algo]:
65  nRun = len(CollectorHistograms[algo][exp])
66  total_item += nRun
67  if nRun > max_total_run:
68  max_total_run = nRun
69  total_length = max_total_run * len(vu.time_algorithms)
70 
71  print(f'Looping over {total_item} items')
72  count = 0
73  vu.progress(0, total_item)
74 
75  shift_histos = {}
76  shift_histos_merged_over_ladder = {}
77 
78  for algo in CollectorHistograms:
79  shift_histos[algo] = {}
80  shift_histos_merged_over_ladder[algo] = {}
81  for exp in CollectorHistograms[algo]:
82  for run in CollectorHistograms[algo][exp]:
83  # print(f"working with : algo {algo} exp {exp} run {run}")
84 
85  histos = vu.get_histos(CollectorHistograms[algo][exp][run])
86 
87  if histos is None:
88  print(f'Skipping file algo {algo} exp {exp} run {run}')
89  continue
90 
91  # if some histogram is empty (too little stat) do not crash but skip that file for that calibration
92  try:
93  entries_eventT0_ = histos['eventT0'].GetEntries()
94  if run not in entries_eventT0[algo] or entries_eventT0_ > entries_eventT0[algo][run]:
95  agreements[algo][run] = {key: vu.get_agreement(histos['eventT0'], h_diff)
96  for key, h_diff in histos['diff'].items()}
97  precisions[algo][run] = {key: vu.get_precision(h_diff)
98  for key, h_diff in histos['diff'].items()}
99  discriminations[algo][run] = {key: vu.get_roc_auc(histos['onTracks'][key], histos['offTracks'][key])
100  for key in histos['onTracks']}
101  shift_agreements[algo][run] = {key: vu.get_shift_agreement(hShift)
102  for key, hShift in histos['timeShifter'].items()}
103  entries_onTracks[algo][run] = {key: val.GetEntries() for key, val in histos['onTracks'].items()}
104  entries_eventT0[algo][run] = entries_eventT0_
105 
106  for key, hShift in histos['timeShifter'].items():
107  if key in shift_histos[algo]:
108  shift_histos[algo][key].Add(hShift)
109  else:
110  shift_histos[algo][key] = hShift.Clone()
111  shift_histos[algo][key].SetDirectory(0)
112  sensor_id = re.findall(r'\d+', key) + [key[-1]]
113  keyGroup = f'L{sensor_id[0]}S{sensor_id[2]}{sensor_id[3]}'
114  if keyGroup in shift_histos_merged_over_ladder[algo]:
115  shift_histos_merged_over_ladder[algo][keyGroup].Add(hShift)
116  else:
117  shift_histos_merged_over_ladder[algo][keyGroup] = hShift.Clone()
118  shift_histos_merged_over_ladder[algo][keyGroup].SetDirectory(0)
119 
120  vu.make_combined_plot('*U', histos,
121  title=f'exp {exp} run {run} U {algo}')
122  plt.savefig(plots_per_run / f'{exp}_{run}_U_{algo}.pdf')
123  plt.close()
124 
125  vu.make_combined_plot('*V', histos,
126  title=f'exp {exp} run {run} V {algo}')
127  plt.savefig(plots_per_run / f'{exp}_{run}_V_{algo}.pdf')
128  plt.close()
129 
130  roc_U[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*U'),
131  vu.get_combined(histos['offTracks'], '*U'))
132  roc_V[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*V'),
133  vu.get_combined(histos['offTracks'], '*V'))
134  except AttributeError:
135  print(f'Skipping file algo {algo} exp {exp} run {run}')
136  continue
137  vu.progress(count + 1, total_item)
138  count += 1
139 
140  print()
141 
142  for algo, KeyHisto in shift_histos.items():
143  c2 = r.TCanvas("c2", "c2", 640, 480)
144  outPDF = f"{output_dir}/shift_histograms_{algo}.pdf"
145  c2.Print(outPDF + "[")
146  onePad = r.TPad("onePad", "onePad", 0, 0, 1, 1)
147  onePad.SetMargin(0.1, 0.2, 0.1, 0.1)
148  onePad.SetNumber(1)
149  onePad.Draw()
150  onePad.cd()
151  hShiftHisto = vu.get_shift_plot(shift_histos_merged_over_ladder[algo])
152  hShiftHisto.Draw('COLZ')
153  c2.Print(outPDF, "Title:" + hShiftHisto.GetName())
154 
155  c1 = r.TCanvas("c1", "c1", 640, 480)
156  topPad = r.TPad("topPad", "topPad", 0, 0.5, 1, 1)
157  btmPad = r.TPad("btmPad", "btmPad", 0, 0, 1, 0.5)
158  topPad.SetMargin(0.1, 0.1, 0, 0.149)
159  btmPad.SetMargin(0.1, 0.1, 0.303, 0)
160  topPad.SetNumber(1)
161  btmPad.SetNumber(2)
162  topPad.Draw()
163  btmPad.Draw()
164  isOdd = True
165  for key, hShift in KeyHisto.items():
166  hShift.SetStats(0)
167  for yn in range(hShift.GetNbinsY()):
168  norm = (hShift.ProjectionX("tmp", yn + 1, yn + 1, "")).GetMaximum()
169  if norm <= 0:
170  continue
171  for xn in range(hShift.GetNbinsX()):
172  hShift.SetBinContent(xn + 1, yn + 1, hShift.GetBinContent(xn + 1, yn + 1) / norm)
173  if isOdd:
174  topPad.cd()
175  hShift.Draw("colz")
176  else:
177  btmPad.cd()
178  hShift.Draw("colz")
179  c1.Print(outPDF, "Title:" + hShift.GetName())
180  isOdd = not isOdd
181  c1.Print(outPDF + "]")
182 
183  dd = {}
184  runs = sorted(agreements[vu.time_algorithms[0]])
185  dd['run'] = sum([[i]*len(vu.names_sides) for i in runs], [])
186  dd['name'] = vu.names_sides*len(runs)
187  dd['side'] = [i[-1] for i in dd['name']]
188 
189  for algo in vu.time_algorithms:
190  dd[f'agreement_{algo}'] = [agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
191  dd[f'precision_{algo}'] = [precisions[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
192  dd[f'discrimination_{algo}'] = [discriminations[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
193  dd[f'shift_agreement_{algo}'] = [shift_agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
194  dd[f'entries_onTracks_{algo}'] = [entries_onTracks[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
195  dd[f'entries_eventT0_{algo}'] = [entries_eventT0[algo][run] for run, side in zip(dd['run'], dd['name'])]
196 
197  # Make ROC plots
198  for run in runs:
199  plt.figure()
200  plt.plot(*roc_U['CoG6'][run], 'k-', label='CoG6 U')
201  plt.plot(*roc_V['CoG6'][run], 'k:', label='CoG6 V')
202  plt.plot(*roc_U['CoG3'][run], 'b-', label='CoG3 U')
203  plt.plot(*roc_V['CoG3'][run], 'b:', label='CoG3 V')
204  plt.plot(*roc_U['ELS3'][run], 'r-', label='ELS3 U')
205  plt.plot(*roc_V['ELS3'][run], 'r:', label='ELS3 V')
206  plt.legend(loc='lower left')
207  plt.xlabel('sgn efficiency')
208  plt.ylabel('bkg rejection')
209  plt.title(f'ROC run {run}')
210  plt.xlim((0, 1))
211  plt.ylim((0, 1))
212  plt.tight_layout()
213  plt.savefig(plots_per_run / f'ROC_{run}.pdf')
214  plt.close()
215 
216  df = pd.DataFrame(dd)
217  df.to_pickle(output_dir / 'df.pkl')
218 
219  # df = pd.read_pickle('df.pkl')
220 
221  print('Making combined plots')
222 
223  for algo in vu.time_algorithms:
224  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
225  ax = sns.violinplot(x='run', y=f'agreement_{algo}', hue='side', data=df, split=True)
226  ax.set_ylim([-2, 2])
227  ax.xaxis.set_minor_locator(ticker.NullLocator())
228  plt.axhline(0, color='black', linestyle='--')
229  plt.axhline(0.5, color='black', linestyle=':')
230  plt.axhline(-0.5, color='black', linestyle=':')
231  plt.setp(ax.get_xticklabels(), rotation=90)
232  plt.tight_layout()
233  plt.savefig(output_dir / f'agreement_{algo}.pdf')
234  plt.close()
235 
236  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
237  ax = sns.violinplot(x='run', y=f'precision_{algo}', hue='side', data=df, split=True)
238  ax.set_ylim([0, 50])
239  ax.xaxis.set_minor_locator(ticker.NullLocator())
240  plt.axhline(10, color='black', linestyle=':')
241  plt.axhline(20, color='black', linestyle=':')
242  plt.setp(ax.get_xticklabels(), rotation=90)
243  plt.tight_layout()
244  plt.savefig(output_dir / f'precision_{algo}.pdf')
245  plt.close()
246 
247  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
248  ax = sns.violinplot(x='run', y=f'discrimination_{algo}', hue='side', data=df, split=True)
249  ax.set_ylim([0.5, 1])
250  ax.xaxis.set_minor_locator(ticker.NullLocator())
251  plt.axhline(0.8, color='black', linestyle=':')
252  plt.axhline(0.9, color='black', linestyle=':')
253  plt.setp(ax.get_xticklabels(), rotation=90)
254  plt.tight_layout()
255  plt.savefig(output_dir / f'discrimination_{algo}.pdf')
256  plt.close()
257 
258  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
259  ax = sns.violinplot(x='run', y=f'shift_agreement_{algo}', hue='side', data=df, split=True, cut=0)
260  ax.xaxis.set_minor_locator(ticker.NullLocator())
261  ax.set_ylim([0.0, 3.5])
262  plt.axhline(0, color='black', linestyle='--')
263  plt.axhline(0.5, color='black', linestyle=':')
264  plt.axhline(1.0, color='black', linestyle=':')
265  plt.axhline(2.0, color='black', linestyle=':')
266  plt.setp(ax.get_xticklabels(), rotation=90)
267  plt.tight_layout()
268  plt.savefig(output_dir / f'shift_agreement_{algo}.pdf')
269  plt.close()
270 
271  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
272  ax = sns.violinplot(x='run', y=f'entries_onTracks_{algo}', hue='side', data=df, split=True, cut=0)
273  ax.xaxis.set_minor_locator(ticker.NullLocator())
274  plt.setp(ax.get_xticklabels(), rotation=90)
275  plt.tight_layout()
276  plt.savefig(output_dir / f'entries_onTracks_{algo}.pdf')
277  plt.close()
278 
279  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
280  ax = sns.violinplot(x='run', y=f'entries_eventT0_{algo}', hue='side', data=df, split=True)
281  ax.xaxis.set_minor_locator(ticker.NullLocator())
282  plt.setp(ax.get_xticklabels(), rotation=90)
283  plt.tight_layout()
284  plt.savefig(output_dir / f'entries_eventT0_{algo}.pdf')
285  plt.close()
286 
287 
288 if __name__ == '__main__':
289 
290  import argparse
291  parser = argparse.ArgumentParser(description=__doc__,
292  formatter_class=argparse.RawTextHelpFormatter)
293 
294  # b2val-prompt-run wants to pass to the script also input_data_path
295  # and requested_iov. As they are not required by this validation I just accept
296  # them together with calibration_results_dir and then ignore them
297  parser.add_argument('calibration_results_dir',
298  help='The directory that contains the collector outputs',
299  nargs='+')
300 
301  parser.add_argument('-o', '--output_dir',
302  help='The directory where all the output will be saved',
303  default='SVDTimeValidation_output')
304  args = parser.parse_args()
305 
306  run_validation(args.calibration_results_dir[0], output_dir=args.output_dir)