Belle II Software  release-05-02-19
svd_time.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 from pathlib import Path
5 import sys
6 
7 import pandas as pd
8 import seaborn as sns
9 import matplotlib
10 import matplotlib.pyplot as plt
11 import matplotlib.ticker as ticker
12 
13 from prompt import ValidationSettings
14 import svd.validation_utils as vu
15 
16 import ROOT as r
17 r.PyConfig.IgnoreCommandLineOptions = True
18 
19 matplotlib.use('Agg')
20 plt.style.use("belle2")
21 
22 
23 settings = ValidationSettings(name="caf_svd_time",
24  description=__doc__,
25  download_files=[],
26  expert_config=None)
27 
28 
29 def progress(count, total):
30  bar_len = 60
31  filled_len = int(round(bar_len * count / total))
32  percents = round(100 * count / total, 1)
33  bar = '=' * filled_len + '-' * (bar_len - filled_len)
34  sys.stdout.write(f'[{bar}] {percents}%\r')
35  sys.stdout.flush()
36 
37 
38 def run_validation(job_path, input_data_path=None, **kwargs):
39  '''job_path will be replaced with path/to/calibration_results
40  input_data_path will be replaced with path/to/data_path used for calibration
41  e.g. /group/belle2/dataprod/Data/PromptSkim/'''
42 
43  collector_output_dir = Path(job_path) / 'SVDTimeValidation/0/collector_output/default/'
44  output_dir = Path(kwargs.get('output_dir', 'SVDTimeValidation_output'))
45  plots_per_run = output_dir / 'runs'
46 
47  plots_per_run.mkdir(parents=True, exist_ok=True)
48 
49  files = list(collector_output_dir.glob('**/CollectorOutput.root'))
50 
51  agreements = {algo: {} for algo in vu.time_algorithms}
52  precisions = {algo: {} for algo in vu.time_algorithms}
53  discriminations = {algo: {} for algo in vu.time_algorithms}
54  entries_onTracks = {algo: {} for algo in vu.time_algorithms}
55  entries_eventT0 = {algo: {} for algo in vu.time_algorithms}
56 
57  roc_U = {algo: {} for algo in vu.time_algorithms}
58  roc_V = {algo: {} for algo in vu.time_algorithms}
59 
60  num_files = len(files)
61  print(f'Looping over {num_files} files')
62  progress(0, num_files)
63  for count, in_file_name in enumerate(files):
64 
65  in_file = r.TFile(str(in_file_name))
66 
67  for algo in vu.time_algorithms:
68 
69  histos, exp, run = vu.get_histos(in_file, algo)
70 
71  if histos is None:
72  print(f'Skipping file {in_file_name} for {algo}')
73  continue
74 
75  # if some histogram is empty (too little stat) do not crash but skip that file for that calibration
76  try:
77  entries_eventT0_ = histos['eventT0'].GetEntries()
78  if run not in entries_eventT0[algo] or entries_eventT0_ > entries_eventT0[algo][run]:
79  agreements[algo][run] = {key: vu.get_agreament(histos['eventT0'], h_diff)
80  for key, h_diff in histos['diff'].items()}
81  precisions[algo][run] = {key: vu.get_precision(h_diff)
82  for key, h_diff in histos['diff'].items()}
83  discriminations[algo][run] = {key: vu.get_roc_auc(histos['onTracks'][key], histos['offTracks'][key])
84  for key in histos['onTracks']}
85  entries_onTracks[algo][run] = {key: val.GetEntries() for key, val in histos['onTracks'].items()}
86  entries_eventT0[algo][run] = entries_eventT0_
87 
88  vu.make_combined_plot('*U', histos,
89  title=f'exp {exp} run {run} U {algo}')
90  plt.savefig(plots_per_run / f'{exp}_{run}_U_{algo}.pdf')
91  plt.close()
92 
93  vu.make_combined_plot('*V', histos,
94  title=f'exp {exp} run {run} V {algo}')
95  plt.savefig(plots_per_run / f'{exp}_{run}_V_{algo}.pdf')
96  plt.close()
97 
98  roc_U[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*U'),
99  vu.get_combined(histos['offTracks'], '*U'))
100  roc_V[algo][run] = vu.make_roc(vu.get_combined(histos['onTracks'], '*V'),
101  vu.get_combined(histos['offTracks'], '*V'))
102  except AttributeError:
103  print(f'Skipping file {in_file_name} for {algo}')
104  continue
105 
106  in_file.Close()
107 
108  # Show the progress
109  progress(count+1, num_files)
110 
111  print()
112 
113  dd = {}
114  runs = sorted(agreements[vu.time_algorithms[0]])
115  dd['run'] = sum([[i]*len(vu.names_sides) for i in runs], [])
116  dd['name'] = vu.names_sides*len(runs)
117  dd['side'] = [i[-1] for i in dd['name']]
118 
119  for algo in vu.time_algorithms:
120  dd[f'agreement_{algo}'] = [agreements[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
121  dd[f'precision_{algo}'] = [precisions[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
122  dd[f'discrimination_{algo}'] = [discriminations[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
123  dd[f'entries_onTracks_{algo}'] = [entries_onTracks[algo][run][side] for run, side in zip(dd['run'], dd['name'])]
124  dd[f'entries_eventT0_{algo}'] = [entries_eventT0[algo][run] for run, side in zip(dd['run'], dd['name'])]
125 
126  # Make ROC plots
127  for run in runs:
128  plt.figure()
129  plt.plot(*roc_U['CoG6'][run], 'k-', label='CoG6 U')
130  plt.plot(*roc_V['CoG6'][run], 'k:', label='CoG6 V')
131  plt.plot(*roc_U['CoG3'][run], 'b-', label='CoG3 U')
132  plt.plot(*roc_V['CoG3'][run], 'b:', label='CoG3 V')
133  plt.plot(*roc_U['ELS3'][run], 'r-', label='ELS3 U')
134  plt.plot(*roc_V['ELS3'][run], 'r:', label='ELS3 V')
135  plt.legend(loc='lower left')
136  plt.xlabel('sgn efficiency')
137  plt.ylabel('bkg rejection')
138  plt.title(f'ROC run {run}')
139  plt.xlim((0, 1))
140  plt.ylim((0, 1))
141  plt.tight_layout()
142  plt.savefig(plots_per_run / f'ROC_{run}.pdf')
143  plt.close()
144 
145  df = pd.DataFrame(dd)
146  df.to_pickle(output_dir / 'df.pkl')
147 
148  # df = pd.read_pickle('df.pkl')
149 
150  print('Making combined plots')
151 
152  for algo in vu.time_algorithms:
153  plt.figure(figsize=(6.4*max(2, num_files/30), 4.8*2))
154  ax = sns.violinplot(x='run', y=f'agreement_{algo}', hue='side', data=df, split=True)
155  ax.set_ylim([-2, 2])
156  ax.xaxis.set_minor_locator(ticker.NullLocator())
157  plt.axhline(0, color='black', linestyle='--')
158  plt.axhline(0.5, color='black', linestyle=':')
159  plt.axhline(-0.5, color='black', linestyle=':')
160  plt.setp(ax.get_xticklabels(), rotation=90)
161  plt.tight_layout()
162  plt.savefig(output_dir / f'agreement_{algo}.pdf')
163  plt.close()
164 
165  plt.figure(figsize=(6.4*max(2, num_files/30), 4.8*2))
166  ax = sns.violinplot(x='run', y=f'precision_{algo}', hue='side', data=df, split=True)
167  ax.set_ylim([0, 50])
168  ax.xaxis.set_minor_locator(ticker.NullLocator())
169  plt.axhline(10, color='black', linestyle=':')
170  plt.axhline(20, color='black', linestyle=':')
171  plt.setp(ax.get_xticklabels(), rotation=90)
172  plt.tight_layout()
173  plt.savefig(output_dir / f'precision_{algo}.pdf')
174  plt.close()
175 
176  plt.figure(figsize=(6.4*max(2, num_files/30), 4.8*2))
177  ax = sns.violinplot(x='run', y=f'discrimination_{algo}', hue='side', data=df, split=True)
178  ax.set_ylim([0.5, 1])
179  ax.xaxis.set_minor_locator(ticker.NullLocator())
180  plt.axhline(0.8, color='black', linestyle=':')
181  plt.axhline(0.9, color='black', linestyle=':')
182  plt.setp(ax.get_xticklabels(), rotation=90)
183  plt.tight_layout()
184  plt.savefig(output_dir / f'discrimination_{algo}.pdf')
185  plt.close()
186 
187  plt.figure(figsize=(6.4*max(2, num_files/30), 4.8*2))
188  ax = sns.violinplot(x='run', y=f'entries_onTracks_{algo}', hue='side', data=df, split=True, cut=0)
189  ax.xaxis.set_minor_locator(ticker.NullLocator())
190  plt.setp(ax.get_xticklabels(), rotation=90)
191  plt.tight_layout()
192  plt.savefig(output_dir / f'entries_onTracks_{algo}.pdf')
193  plt.close()
194 
195  plt.figure(figsize=(6.4*max(2, num_files/30), 4.8*2))
196  ax = sns.violinplot(x='run', y=f'entries_eventT0_{algo}', hue='side', data=df, split=True)
197  ax.xaxis.set_minor_locator(ticker.NullLocator())
198  plt.setp(ax.get_xticklabels(), rotation=90)
199  plt.tight_layout()
200  plt.savefig(output_dir / f'entries_eventT0_{algo}.pdf')
201  plt.close()
202 
203 
204 if __name__ == '__main__':
205 
206  import argparse
207  parser = argparse.ArgumentParser(description=__doc__,
208  formatter_class=argparse.RawTextHelpFormatter)
209 
210  # b2val-prompt-run wants to pass to the script also input_data_path
211  # and requested_iov. As they are not required by this validation I just accept
212  # them together with calibration_results_dir and then ignore them
213  parser.add_argument('calibration_results_dir',
214  help='The directory that contains the collector outputs',
215  nargs='+')
216 
217  parser.add_argument('-o', '--output_dir',
218  help='The directory where all the output will be saved',
219  default='SVDTimeValidation_output')
220  args = parser.parse_args()
221 
222  run_validation(args.calibration_results_dir[0], output_dir=args.output_dir)
svd.validation_utils
Definition: validation_utils.py:1