Belle II Software  release-06-02-00
validation_utils.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import fnmatch
13 import math
14 
15 import numpy as np
16 import matplotlib
17 import matplotlib.pyplot as plt
18 
19 import ROOT as r
20 r.PyConfig.IgnoreCommandLineOptions = True
21 
22 matplotlib.use('Agg')
23 plt.style.use("belle2")
24 
25 
26 def get_full_path(name, exp, run, base_dir):
27  return f'{base_dir}/{name}/{name}_{exp}.{run}/{name}_1'
28 
29 
30 def get_combined(histos, pattern):
31  '''
32  Sum histograms that match a certain pattern shell-style, e.g.
33  "*" all, "L3*" all layer 3, "*U" all U-sensors
34 
35  Parameters:
36  histos (dict{label : TH1}): dictionary of
37  patern (str): shell-styel matching pattern
38  '''
39 
40  good_keys = [key for key in histos.keys() if fnmatch.fnmatch(key, pattern)]
41 
42  assert len(good_keys) > 0
43 
44  h = histos[good_keys[0]].Clone()
45  h.SetName(h.GetName().split('__')[0]+f'__{pattern}')
46  h.SetTitle(h.GetName().split('__')[0]+f' in {pattern}')
47 
48  for key in good_keys[1:]:
49  h.Add(histos[key])
50 
51  return h
52 
53 
54 def get_histo_offTracks(histo_all, histo_onTracks):
55  '''
56  Substract the histogram with the clusters on tracks from the histogram with all the clusters
57  to get the histogram with the clusters off tracks
58  '''
59  try:
60  histo_offTracks = histo_all.Clone(histo_all.GetName().replace('all', 'offTracks'))
61  histo_offTracks.SetTitle(histo_offTracks.GetTitle().replace('All', 'OffTracks'))
62  histo_offTracks.Add(histo_onTracks, -1)
63  return histo_offTracks
64  except (ReferenceError, TypeError):
65  return None
66 
67 
68 def get_agreament(histo_eventT0, histo_diff, min_entries=100):
69  '''
70  Get the mean of the difference between the mean time of the clusters on tracks and
71  the mean eventT0 divided by the RMS of the event T0
72  '''
73  if histo_eventT0.GetEntries() > min_entries and histo_diff.GetEntries() > min_entries:
74  return histo_diff.GetMean()/histo_eventT0.GetRMS()
75  else:
76  return np.nan
77 
78 
79 def get_precision(histo_diff, min_entries=100):
80  '''
81  Get the RMS of the difference between the mean time of the clusters on tracks
82  '''
83  if histo_diff.GetEntries() > min_entries:
84  return histo_diff.GetRMS()
85  else:
86  return np.nan
87 
88 
89 def get_agreament2(histo_eventT0, histo_onTracks, min_entries=100):
90  '''
91  Get the difference between the mean time of the clusters on tracks and the mean eventT0 divided by the RMS of the event T0
92  '''
93  # Get the difference in number of sigmas between the mean eventT0 and the mean time of the clusters on tracks
94  if histo_eventT0.GetEntries() > min_entries and histo_onTracks.GetEntries() > min_entries:
95  return (histo_onTracks.GetMean() - histo_eventT0.GetMean())/histo_eventT0.GetRMS()
96  else:
97  return np.nan
98 
99 
100 def make_roc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True):
101  import root_numpy
102  dist_sgn = root_numpy.hist2array(hist_sgn)
103  dist_bkg = root_numpy.hist2array(hist_bkg)
104  dist_sgn = dist_sgn/dist_sgn.sum()
105  dist_bkg = dist_bkg/dist_bkg.sum()
106  if two_sided:
107  dist_sgn = np.append(dist_sgn, [0])
108  dist_bkg = np.append(dist_bkg, [0])
109  eff_sgn = tuple(reversed([sum(dist_sgn[i:-(1+i)]) for i in range(math.ceil(len(dist_sgn)/2)+1)]))
110  eff_bkg = tuple(reversed([sum(dist_bkg[i:-(1+i)]) for i in range(math.ceil(len(dist_bkg)/2)+1)]))
111  else:
112  if not lower_is_better:
113  dist_sgn = np.array(tuple(reversed(dist_sgn)))
114  dist_bkg = np.array(tuple(reversed(dist_bkg)))
115  eff_sgn = [sum(dist_sgn[:i+1]) for i in range(len(dist_sgn))]
116  eff_bkg = [sum(dist_bkg[:i+1]) for i in range(len(dist_bkg))]
117  return eff_sgn, [1-i for i in eff_bkg]
118 
119 
120 def get_roc_auc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True, min_entries=100):
121  if hist_sgn.GetEntries() > min_entries and hist_bkg.GetEntries() > min_entries:
122 
123  return np.trapz(*reversed(make_roc(
124  hist_sgn, hist_bkg,
125  lower_is_better=lower_is_better,
126  two_sided=two_sided)))
127  else:
128  return np.nan
129 
130 
131 def np2plt_hist(np_hist):
132  return {'x': np_hist[1][0][:-1], 'bins': np_hist[1][0], 'weights': np_hist[0]}
133 
134 
135 def make_combined_plot(pattern, histos, title=None):
136  import root_numpy
137  h_onTracks = root_numpy.hist2array(get_combined(histos['onTracks'], pattern), return_edges=True)
138  h_offTracks = root_numpy.hist2array(get_combined(histos['offTracks'], pattern), return_edges=True)
139  h_eventT0 = root_numpy.hist2array(histos['eventT0'], return_edges=True)
140 
141  # normalise h_eventT0 to have same number of entries as h_onTracks
142  h_eventT0 = (h_eventT0[0]*sum(h_onTracks[0])/sum(h_eventT0[0]), h_eventT0[1])
143 
144  plt.figure()
145  plt.hist(h_onTracks[1][0][:-1], h_onTracks[1][0], weights=h_onTracks[0], histtype='step', label='on tracks')
146  plt.hist(h_offTracks[1][0][:-1], h_offTracks[1][0], weights=h_offTracks[0], histtype='step', label='off tracks')
147  plt.hist(h_eventT0[1][0][:-1], h_eventT0[1][0], weights=h_eventT0[0], histtype='step', label='event T0')
148  plt.legend(loc='upper right')
149  plt.xlabel('cluster time [ns]')
150  if title is not None:
151  plt.title(title)
152  plt.tight_layout()
153 
154 
155 # get labels for all possible sides
156 names_sides = []
157 ladder_of_layer = [7, 10, 12, 16]
158 sensor_on_layer = [2, 3, 4, 5]
159 for layer in range(3, 7):
160  for ladder in range(1, ladder_of_layer[layer-3]+1):
161  for sensor in range(1, sensor_on_layer[layer-3]+1):
162  for side in ['U', 'V']:
163  names_sides.append(f'L{layer}L{ladder}S{sensor}{side}')
164 
165 
166 # grouped sensors integrating phi
167 names_grouped_sides = []
168 for layer in range(3, 7):
169  for sensor in range(1, sensor_on_layer[layer-3]+1):
170  for side in ['U', 'V']:
171  names_grouped_sides.append(f'L{layer}L*S{sensor}{side}')
172 
173 
174 time_algorithms = ['CoG6', 'CoG3', 'ELS3']
175 
176 
177 def get_histos(in_file, algo='CoG3',
178  names_sides=names_sides,
179  base_dirs={algo: f'SVDTimeValidationCollector_{algo}'
180  for algo in time_algorithms}):
181 
182  histos = {}
183 
184  base_dir = base_dirs[algo]
185  iov = in_file.Get(f'{base_dir}/RunRange').getIntervalOfValidity()
186  exp, run = iov.getExperimentLow(), iov.getRunLow()
187 
188  histos['eventT0'] = in_file.Get(get_full_path('hEventT0', exp, run, base_dir))
189 
190  histos_all = {name_side: in_file.Get(get_full_path(f'clsTimeAll__{name_side}', exp, run, base_dir))
191  for name_side in names_sides}
192  histos['onTracks'] = {name_side: in_file.Get(get_full_path(f'clsTimeOnTracks__{name_side}', exp, run, base_dir))
193  for name_side in names_sides}
194  histos['offTracks'] = {key: get_histo_offTracks(histos_all[key], histos['onTracks'][key])
195  for key in histos['onTracks']}
196  histos['diff'] = {name_side: in_file.Get(get_full_path(f'clsDiffTimeOnTracks__{name_side}', exp, run, base_dir))
197  for name_side in names_sides}
198 
199  # replace None with empty histograms
200  for kind in ['onTracks', 'offTracks', 'diff']:
201  for key, value in histos[kind].items():
202  if not isinstance(value, r.TH1):
203  histos[kind][key] = r.TH1F(f'{kind}_{key}_{algo}', f'{kind}_{key}_{algo}', 300, -150, 150)
204 
205  return histos, exp, run