Belle II Software  release-08-01-10
svd_time.py
1 #!/usr/bin/env python3
2 
3 
10 
11 from pathlib import Path
12 
13 import pandas as pd
14 import seaborn as sns
15 import matplotlib
16 import matplotlib.pyplot as plt
17 import matplotlib.ticker as ticker
18 import re
19 
20 from prompt import ValidationSettings
21 import svd.validation_utils as vu
22 
23 import ROOT as r
24 r.PyConfig.IgnoreCommandLineOptions = True
25 r.gROOT.SetBatch()
26 
27 matplotlib.use('Agg')
28 plt.style.use("belle2")
29 
30 
31 settings = ValidationSettings(name="caf_svd_time",
32  description=__doc__,
33  download_files=[],
34  expert_config=None)
35 
36 
37 def run_validation(job_path, input_data_path=None, **kwargs):
38  '''job_path will be replaced with path/to/calibration_results
39  input_data_path will be replaced with path/to/data_path used for calibration
40  e.g. /group/belle2/dataprod/Data/PromptSkim/'''
41 
42  collector_output_dir = Path(job_path) / 'SVDTimeValidation/0/collector_output/default/'
43  output_dir = Path(kwargs.get('output_dir', 'SVDTimeValidation_output'))
44  plots_per_run = output_dir / 'runs'
45 
46  plots_per_run.mkdir(parents=True, exist_ok=True)
47 
48  files = list(collector_output_dir.glob('**/CollectorOutput.root'))
49 
50  agreements = {algo: {} for algo in vu.time_algorithms}
51  precisions = {algo: {} for algo in vu.time_algorithms}
52  discriminations = {algo: {} for algo in vu.time_algorithms}
53  shift_agreements = {algo: {} for algo in vu.time_algorithms}
54  entries_onTracks = {algo: {} for algo in vu.time_algorithms}
55  entries_eventT0 = {algo: {} for algo in vu.time_algorithms}
56 
57  roc_U = {algo: {} for algo in vu.time_algorithms}
58  roc_V = {algo: {} for algo in vu.time_algorithms}
59 
60  CollectorHistograms = vu.get_merged_collector_histograms(files)
61 
62  max_total_run = 0
63  total_item = 0
64  for algo in CollectorHistograms:
65  for exp in CollectorHistograms[algo]:
66  nRun = len(CollectorHistograms[algo][exp])
67  total_item += nRun
68  if nRun > max_total_run:
69  max_total_run = nRun
70  total_length = max_total_run * len(vu.time_algorithms)
71 
72  print(f'Looping over {total_item} items')
73  count = 0
74  vu.progress(0, total_item)
75 
76  shift_histos = {}
77  shift_histos_merged_over_ladder = {}
78 
79  for algo in CollectorHistograms:
80  shift_histos[algo] = {}
81  shift_histos_merged_over_ladder[algo] = {}
82  for exp in CollectorHistograms[algo]:
83  for run in CollectorHistograms[algo][exp]:
84  # print(f"working with : algo {algo} exp {exp} run {run}")
85 
86  histos = vu.get_histos(CollectorHistograms[algo][exp][run])
87 
88  if histos is None:
89  print(f'Skipping file algo {algo} exp {exp} run {run}')
90  continue
91 
92  # if some histogram is empty (too little stat) do not crash but skip that file for that calibration
93  try:
94  entries_eventT0_ = histos['eventT0'].GetEntries()
95  if run not in entries_eventT0[algo] or entries_eventT0_ > entries_eventT0[algo][run]:
96  agreements[algo][run] = {key: vu.get_agreement(histos['eventT0'], h_diff)
97  for key, h_diff in histos['diff'].items()}
98  precisions[algo][run] = {key: vu.get_precision(h_diff)
99  for key, h_diff in histos['diff'].items()}
100  discriminations[algo][run] = {key: vu.get_roc_auc(histos['onTracks'][key], histos['offTracks'][key])
101  for key in histos['onTracks']}
102  shift_agreements[algo][run] = {key: vu.get_shift_agreement(hShift)
103  for key, hShift in histos['timeShifter'].items()}
104  entries_onTracks[algo][run] = {key: val.GetEntries() for key, val in histos['onTracks'].items()}
105  entries_eventT0[algo][run] = entries_eventT0_
106 
107  for key, hShift in histos['timeShifter'].items():
108  if key in shift_histos[algo]:
109  shift_histos[algo][key].Add(hShift)
110  else:
111  shift_histos[algo][key] = hShift.Clone()
112  shift_histos[algo][key].SetDirectory(0)
113  sensor_id = re.findall(r'\d+', key) + [key[-1]]
114  keyGroup = f'L{sensor_id[0]}S{sensor_id[2]}{sensor_id[3]}'
115  if keyGroup in shift_histos_merged_over_ladder[algo]:
116  shift_histos_merged_over_ladder[algo][keyGroup].Add(hShift)
117  else:
118  shift_histos_merged_over_ladder[algo][keyGroup] = hShift.Clone()
119  shift_histos_merged_over_ladder[algo][keyGroup].SetDirectory(0)
120 
121  vu.make_combined_plot('*U', histos,
122  title=f'exp {exp} run {run} U {algo}')
123  plt.savefig(plots_per_run / f'{exp}_{run}_U_{algo}.pdf')
124  plt.close()
125 
126  vu.make_combined_plot('*V', histos,
127  title=f'exp {exp} run {run} V {algo}')
128  plt.savefig(plots_per_run / f'{exp}_{run}_V_{algo}.pdf')
129  plt.close()
130 
131  roc_U[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*U'),
132  vu.get_combined(histos['offTracks'], '*U'))
133  roc_V[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*V'),
134  vu.get_combined(histos['offTracks'], '*V'))
135  except AttributeError:
136  print(f'Skipping file algo {algo} exp {exp} run {run}')
137  continue
138  vu.progress(count + 1, total_item)
139  count += 1
140 
141  print()
142 
143  for algo, KeyHisto in shift_histos.items():
144  c2 = r.TCanvas("c2", "c2", 640, 480)
145  outPDF = f"{output_dir}/shift_histograms_{algo}.pdf"
146  c2.Print(outPDF + "[")
147  onePad = r.TPad("onePad", "onePad", 0, 0, 1, 1)
148  onePad.SetMargin(0.1, 0.2, 0.1, 0.1)
149  onePad.SetNumber(1)
150  onePad.Draw()
151  onePad.cd()
152  hShiftHisto = vu.get_shift_plot(shift_histos_merged_over_ladder[algo])
153  hShiftHisto.Draw('COLZ')
154  c2.Print(outPDF, "Title:" + hShiftHisto.GetName())
155 
156  c1 = r.TCanvas("c1", "c1", 640, 480)
157  topPad = r.TPad("topPad", "topPad", 0, 0.5, 1, 1)
158  btmPad = r.TPad("btmPad", "btmPad", 0, 0, 1, 0.5)
159  topPad.SetMargin(0.1, 0.1, 0, 0.149)
160  btmPad.SetMargin(0.1, 0.1, 0.303, 0)
161  topPad.SetNumber(1)
162  btmPad.SetNumber(2)
163  topPad.Draw()
164  btmPad.Draw()
165  isOdd = True
166  for key, hShift in KeyHisto.items():
167  hShift.SetStats(0)
168  for yn in range(hShift.GetNbinsY()):
169  norm = (hShift.ProjectionX("tmp", yn + 1, yn + 1, "")).GetMaximum()
170  if norm <= 0:
171  continue
172  for xn in range(hShift.GetNbinsX()):
173  hShift.SetBinContent(xn + 1, yn + 1, hShift.GetBinContent(xn + 1, yn + 1) / norm)
174  if isOdd:
175  topPad.cd()
176  hShift.Draw("colz")
177  else:
178  btmPad.cd()
179  hShift.Draw("colz")
180  c1.Print(outPDF, "Title:" + hShift.GetName())
181  isOdd = not isOdd
182  c1.Print(outPDF + "]")
183 
184  dd = {}
185  runs = sorted(agreements[vu.time_algorithms[0]])
186  dd['run'] = sum([[i]*len(vu.names_sides) for i in runs], [])
187  dd['name'] = vu.names_sides*len(runs)
188  dd['side'] = [i[-1] for i in dd['name']]
189 
190  for algo in vu.time_algorithms:
191  dd[f'agreement_{algo}'] = [agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
192  dd[f'precision_{algo}'] = [precisions[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
193  dd[f'discrimination_{algo}'] = [discriminations[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
194  dd[f'shift_agreement_{algo}'] = [shift_agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
195  dd[f'entries_onTracks_{algo}'] = [entries_onTracks[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
196  dd[f'entries_eventT0_{algo}'] = [entries_eventT0[algo][run] for run, side in zip(dd['run'], dd['name'])]
197 
198  # Make ROC plots
199  for run in runs:
200  plt.figure()
201  plt.plot(*roc_U['CoG6'][run], 'k-', label='CoG6 U')
202  plt.plot(*roc_V['CoG6'][run], 'k:', label='CoG6 V')
203  plt.plot(*roc_U['CoG3'][run], 'b-', label='CoG3 U')
204  plt.plot(*roc_V['CoG3'][run], 'b:', label='CoG3 V')
205  plt.plot(*roc_U['ELS3'][run], 'r-', label='ELS3 U')
206  plt.plot(*roc_V['ELS3'][run], 'r:', label='ELS3 V')
207  plt.legend(loc='lower left')
208  plt.xlabel('sgn efficiency')
209  plt.ylabel('bkg rejection')
210  plt.title(f'ROC run {run}')
211  plt.xlim((0, 1))
212  plt.ylim((0, 1))
213  plt.tight_layout()
214  plt.savefig(plots_per_run / f'ROC_{run}.pdf')
215  plt.close()
216 
217  df = pd.DataFrame(dd)
218  df.to_pickle(output_dir / 'df.pkl')
219 
220  # df = pd.read_pickle('df.pkl')
221 
222  print('Making combined plots')
223 
224  for algo in vu.time_algorithms:
225  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
226  ax = sns.violinplot(x='run', y=f'agreement_{algo}', hue='side', data=df, split=True)
227  ax.set_ylim([-2, 2])
228  ax.xaxis.set_minor_locator(ticker.NullLocator())
229  plt.axhline(0, color='black', linestyle='--')
230  plt.axhline(0.5, color='black', linestyle=':')
231  plt.axhline(-0.5, color='black', linestyle=':')
232  plt.setp(ax.get_xticklabels(), rotation=90)
233  plt.tight_layout()
234  plt.savefig(output_dir / f'agreement_{algo}.pdf')
235  plt.close()
236 
237  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
238  ax = sns.violinplot(x='run', y=f'precision_{algo}', hue='side', data=df, split=True)
239  ax.set_ylim([0, 50])
240  ax.xaxis.set_minor_locator(ticker.NullLocator())
241  plt.axhline(10, color='black', linestyle=':')
242  plt.axhline(20, color='black', linestyle=':')
243  plt.setp(ax.get_xticklabels(), rotation=90)
244  plt.tight_layout()
245  plt.savefig(output_dir / f'precision_{algo}.pdf')
246  plt.close()
247 
248  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
249  ax = sns.violinplot(x='run', y=f'discrimination_{algo}', hue='side', data=df, split=True)
250  ax.set_ylim([0.5, 1])
251  ax.xaxis.set_minor_locator(ticker.NullLocator())
252  plt.axhline(0.8, color='black', linestyle=':')
253  plt.axhline(0.9, color='black', linestyle=':')
254  plt.setp(ax.get_xticklabels(), rotation=90)
255  plt.tight_layout()
256  plt.savefig(output_dir / f'discrimination_{algo}.pdf')
257  plt.close()
258 
259  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
260  ax = sns.violinplot(x='run', y=f'shift_agreement_{algo}', hue='side', data=df, split=True, cut=0)
261  ax.xaxis.set_minor_locator(ticker.NullLocator())
262  ax.set_ylim([0.0, 3.5])
263  plt.axhline(0, color='black', linestyle='--')
264  plt.axhline(0.5, color='black', linestyle=':')
265  plt.axhline(1.0, color='black', linestyle=':')
266  plt.axhline(2.0, color='black', linestyle=':')
267  plt.setp(ax.get_xticklabels(), rotation=90)
268  plt.tight_layout()
269  plt.savefig(output_dir / f'shift_agreement_{algo}.pdf')
270  plt.close()
271 
272  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
273  ax = sns.violinplot(x='run', y=f'entries_onTracks_{algo}', hue='side', data=df, split=True, cut=0)
274  ax.xaxis.set_minor_locator(ticker.NullLocator())
275  plt.setp(ax.get_xticklabels(), rotation=90)
276  plt.tight_layout()
277  plt.savefig(output_dir / f'entries_onTracks_{algo}.pdf')
278  plt.close()
279 
280  plt.figure(figsize=(6.4*max(2, total_length/30), 4.8*2))
281  ax = sns.violinplot(x='run', y=f'entries_eventT0_{algo}', hue='side', data=df, split=True)
282  ax.xaxis.set_minor_locator(ticker.NullLocator())
283  plt.setp(ax.get_xticklabels(), rotation=90)
284  plt.tight_layout()
285  plt.savefig(output_dir / f'entries_eventT0_{algo}.pdf')
286  plt.close()
287 
288 
289 if __name__ == '__main__':
290 
291  import argparse
292  parser = argparse.ArgumentParser(description=__doc__,
293  formatter_class=argparse.RawTextHelpFormatter)
294 
295  # b2val-prompt-run wants to pass to the script also input_data_path
296  # and requested_iov. As they are not required by this validation I just accept
297  # them together with calibration_results_dir and then ignore them
298  parser.add_argument('calibration_results_dir',
299  help='The directory that contains the collector outputs',
300  nargs='+')
301 
302  parser.add_argument('-o', '--output_dir',
303  help='The directory where all the output will be saved',
304  default='SVDTimeValidation_output')
305  args = parser.parse_args()
306 
307  run_validation(args.calibration_results_dir[0], output_dir=args.output_dir)