Belle II Software  release-08-01-10
validation_utils.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import sys
12 
13 import fnmatch
14 import math
15 
16 import numpy as np
17 import matplotlib
18 import matplotlib.pyplot as plt
19 
20 import ROOT as r
21 r.PyConfig.IgnoreCommandLineOptions = True
22 
23 matplotlib.use('Agg')
24 plt.style.use("belle2")
25 
26 
27 def progress(count, total):
28  bar_len = 60
29  filled_len = int(round(bar_len * count / total))
30  percents = round(100 * count / total, 1)
31  bar = '=' * filled_len + '-' * (bar_len - filled_len)
32  sys.stdout.write(f'[{bar}] {percents}%\r')
33  sys.stdout.flush()
34 
35 
36 def get_full_path(name, exp, run, base_dir):
37  return f'{base_dir}/{name}/{name}_{exp}.{run}/{name}_1'
38 
39 
40 def get_combined(histos, pattern):
41  '''
42  Sum histograms that match a certain pattern shell-style, e.g.
43  "*" all, "L3*" all layer 3, "*U" all U-sensors
44 
45  Parameters:
46  histos (dict{label : TH1}): dictionary of
47  patern (str): shell-styel matching pattern
48  '''
49 
50  good_keys = [key for key in histos.keys() if fnmatch.fnmatch(key, pattern)]
51 
52  assert len(good_keys) > 0
53 
54  h = histos[good_keys[0]].Clone()
55  h.SetName(h.GetName().split('__')[0]+f'__{pattern}')
56  h.SetTitle(h.GetName().split('__')[0]+f' in {pattern}')
57 
58  for key in good_keys[1:]:
59  h.Add(histos[key])
60 
61  return h
62 
63 
64 def get_histo_offTracks(histo_all, histo_onTracks):
65  '''
66  Substract the histogram with the clusters on tracks from the histogram with all the clusters
67  to get the histogram with the clusters off tracks
68  '''
69  try:
70  histo_offTracks = histo_all.Clone(histo_all.GetName().replace('all', 'offTracks'))
71  histo_offTracks.SetTitle(histo_offTracks.GetTitle().replace('All', 'OffTracks'))
72  histo_offTracks.Add(histo_onTracks, -1)
73  return histo_offTracks
74  except (ReferenceError, TypeError):
75  return None
76 
77 
78 def get_agreement(histo_eventT0, histo_diff, min_entries=100):
79  '''
80  Get the mean of the difference between the mean time of the clusters on tracks and
81  the mean eventT0 divided by the RMS of the event T0
82  '''
83  if histo_eventT0.GetEntries() > min_entries and histo_diff.GetEntries() > min_entries:
84  return histo_diff.GetMean()/histo_eventT0.GetRMS()
85  else:
86  return np.nan
87 
88 
89 def get_precision(histo_diff, min_entries=100):
90  '''
91  Get the RMS of the difference between the mean time of the clusters on tracks
92  '''
93  if histo_diff.GetEntries() > min_entries:
94  return histo_diff.GetRMS()
95  else:
96  return np.nan
97 
98 
99 def get_agreement2(histo_eventT0, histo_onTracks, min_entries=100):
100  '''
101  Get the difference between the mean time of the clusters on tracks and the mean eventT0 divided by the RMS of the event T0
102  '''
103  # Get the difference in number of sigmas between the mean eventT0 and the mean time of the clusters on tracks
104  if histo_eventT0.GetEntries() > min_entries and histo_onTracks.GetEntries() > min_entries:
105  return (histo_onTracks.GetMean() - histo_eventT0.GetMean())/histo_eventT0.GetRMS()
106  else:
107  return np.nan
108 
109 
110 def get_shift_plot(shift_histos, min_entries=100):
111  '''
112  The creates a 2D plot to visualize the shift of mean of the
113  cluster time distribution for each cluster sizes for each sensor group.
114  '''
115  hShiftVal = None
116  binNumber = 1
117  for key, hShift in shift_histos.items():
118  if hShiftVal is None:
119  nxbins = len(shift_histos)
120  nybins = hShift.GetNbinsY()
121  hShiftVal = r.TH2F("hShiftVal", "Cluster Size VS Shift Values in Each Sensor Group",
122  nxbins, 0.5, nxbins + 0.5, nybins, 0.5, nybins + 0.5)
123  hShiftVal.GetZaxis().SetTitle("(Not fitted) Mean of Cluster Time Distribution (in ns)")
124  hShiftVal.GetYaxis().SetTitle("Cluster Size")
125 
126  for ij in range(hShift.GetNbinsY()):
127  hist = hShift.ProjectionX("tmp", ij + 1, ij + 1, "")
128  if hist.GetSumOfWeights() > min_entries:
129  hShiftVal.SetBinContent(binNumber, ij + 1, hist.GetMean())
130 
131  hShiftVal.GetXaxis().SetBinLabel(binNumber, key)
132  binNumber += 1
133 
134  hShiftVal.SetStats(0)
135  hShiftVal.GetXaxis().LabelsOption("V")
136  return hShiftVal
137 
138 
139 def get_shift_agreement(shift_histo, min_entries=100):
140  '''
141  It calculates the mean of cluster-time distribution for each cluster size.
142  Then returns the average of the squared sum of the means.
143  '''
144  mean_values = []
145  if isinstance(shift_histo, r.TH2):
146  for ij in range(shift_histo.GetNbinsY()):
147  hist = shift_histo.ProjectionX("tmp", ij + 1, ij + 1, "")
148  if hist.GetSumOfWeights() > min_entries:
149  hmean = hist.GetMean()
150  mean_values.append(hmean * hmean)
151  if not len(mean_values):
152  return np.nan
153  else:
154  return np.sqrt(np.sum(mean_values)) / len(mean_values)
155 
156 
157 def make_roc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True):
158  from hist_utils import hist2array
159  dist_sgn = hist2array(hist_sgn)
160  dist_bkg = hist2array(hist_bkg)
161  dist_sgn = dist_sgn/dist_sgn.sum()
162  dist_bkg = dist_bkg/dist_bkg.sum()
163  if two_sided:
164  dist_sgn = np.append(dist_sgn, [0])
165  dist_bkg = np.append(dist_bkg, [0])
166  eff_sgn = tuple(reversed([sum(dist_sgn[i:-(1+i)]) for i in range(math.ceil(len(dist_sgn)/2)+1)]))
167  eff_bkg = tuple(reversed([sum(dist_bkg[i:-(1+i)]) for i in range(math.ceil(len(dist_bkg)/2)+1)]))
168  else:
169  if not lower_is_better:
170  dist_sgn = np.array(tuple(reversed(dist_sgn)))
171  dist_bkg = np.array(tuple(reversed(dist_bkg)))
172  eff_sgn = [sum(dist_sgn[:i+1]) for i in range(len(dist_sgn))]
173  eff_bkg = [sum(dist_bkg[:i+1]) for i in range(len(dist_bkg))]
174  return eff_sgn, [1-i for i in eff_bkg]
175 
176 
177 def get_roc_auc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True, min_entries=100):
178  if hist_sgn.GetEntries() > min_entries and hist_bkg.GetEntries() > min_entries:
179 
180  return np.trapz(*reversed(make_roc(
181  hist_sgn, hist_bkg,
182  lower_is_better=lower_is_better,
183  two_sided=two_sided)))
184  else:
185  return np.nan
186 
187 
188 def np2plt_hist(np_hist):
189  return {'x': np_hist[1][0][:-1], 'bins': np_hist[1][0], 'weights': np_hist[0]}
190 
191 
192 def make_combined_plot(pattern, histos, title=None):
193  from hist_utils import hist2array
194  h_onTracks = hist2array(get_combined(histos['onTracks'], pattern), return_edges=True)
195  h_offTracks = hist2array(get_combined(histos['offTracks'], pattern), return_edges=True)
196  h_eventT0 = hist2array(histos['eventT0'], return_edges=True)
197 
198  # normalise h_eventT0 to have same number of entries as h_onTracks
199  h_eventT0 = (h_eventT0[0]*sum(h_onTracks[0])/sum(h_eventT0[0]), h_eventT0[1])
200 
201  plt.figure()
202  plt.hist(h_onTracks[1][0][:-1], h_onTracks[1][0], weights=h_onTracks[0], histtype='step', label='on tracks')
203  plt.hist(h_offTracks[1][0][:-1], h_offTracks[1][0], weights=h_offTracks[0], histtype='step', label='off tracks')
204  plt.hist(h_eventT0[1][0][:-1], h_eventT0[1][0], weights=h_eventT0[0], histtype='step', label='event T0')
205  plt.legend(loc='upper right')
206  plt.xlabel('cluster time [ns]')
207  if title is not None:
208  plt.title(title)
209  plt.tight_layout()
210 
211 
212 # get labels for all possible sides
213 names_sides = []
214 ladder_of_layer = [7, 10, 12, 16]
215 sensor_on_layer = [2, 3, 4, 5]
216 for layer in range(3, 7):
217  for ladder in range(1, ladder_of_layer[layer-3]+1):
218  for sensor in range(1, sensor_on_layer[layer-3]+1):
219  for side in ['U', 'V']:
220  names_sides.append(f'L{layer}L{ladder}S{sensor}{side}')
221 
222 
223 # grouped sensors integrating phi
224 names_grouped_sides = []
225 for layer in range(3, 7):
226  for sensor in range(1, sensor_on_layer[layer-3]+1):
227  for side in ['U', 'V']:
228  names_grouped_sides.append(f'L{layer}L*S{sensor}{side}')
229 
230 
231 time_algorithms = ['CoG6', 'CoG3', 'ELS3']
232 
233 
234 def get_histos(CollectorHistograms):
235 
236  histos = {}
237 
238  histos['eventT0'] = CollectorHistograms['hEventT0'][0]
239 
240  histos_all = {}
241  histos['onTracks'] = {}
242  histos['diff'] = {}
243  histos['timeShifter'] = {}
244 
245  __hClsTimeOnTracks__ = CollectorHistograms['hClsTimeOnTracks'][0]
246  __hClsTimeAll__ = CollectorHistograms['hClsTimeAll'][0]
247  __hClsDiffTimeOnTracks__ = CollectorHistograms['hClsDiffTimeOnTracks'][0]
248  __hClusterSizeVsTimeResidual__ = CollectorHistograms['hClusterSizeVsTimeResidual'][0]
249  __hBinToSensorMap__ = CollectorHistograms['hBinToSensorMap'][0]
250 
251  for name_side in names_sides:
252  sensorBin = __hBinToSensorMap__.GetXaxis().FindBin(name_side)
253 
254  hClsTimeOnTracks = __hClsTimeOnTracks__.ProjectionX("hClsTimeOnTracks_tmp", sensorBin, sensorBin)
255  hClsTimeAll = __hClsTimeAll__.ProjectionX("hClsTimeAll_tmp", sensorBin, sensorBin)
256  hClsDiffTimeOnTracks = __hClsDiffTimeOnTracks__.ProjectionX("hClsDiffTimeOnTracks_tmp", sensorBin, sensorBin)
257 
258  __hClusterSizeVsTimeResidual__.GetZaxis().SetRange(sensorBin, sensorBin)
259  hClusterSizeVsTimeResidual = __hClusterSizeVsTimeResidual__.Project3D("yxe")
260 
261  hClsTimeOnTracks.SetNameTitle(f"clsTimeOnTracks__{name_side}", f"clsTimeOnTracks__{name_side}")
262  hClsTimeAll.SetNameTitle(f"clsTimeAll__{name_side}", f"clsTimeAll__{name_side}")
263  hClsDiffTimeOnTracks.SetNameTitle(f"clsDiffTimeOnTracks__{name_side}", f"clsDiffTimeOnTracks__{name_side}")
264  hClusterSizeVsTimeResidual.SetNameTitle(f"clusterSizeVsTimeResidual__{name_side}",
265  f"Cluster Size vs Time Residual in {name_side}")
266 
267  hClsTimeOnTracks.SetDirectory(0)
268  hClsTimeAll.SetDirectory(0)
269  hClsDiffTimeOnTracks.SetDirectory(0)
270  hClusterSizeVsTimeResidual.SetDirectory(0)
271 
272  histos['onTracks'][name_side] = hClsTimeOnTracks
273  histos_all[name_side] = hClsTimeAll
274  histos['diff'][name_side] = hClsDiffTimeOnTracks
275  histos['timeShifter'][name_side] = hClusterSizeVsTimeResidual
276 
277  histos['offTracks'] = {key: get_histo_offTracks(histos_all[key], histos['onTracks'][key])
278  for key in histos['onTracks']}
279 
280  # replace None with empty histograms
281  for kind in ['onTracks', 'offTracks', 'diff']:
282  for key, value in histos[kind].items():
283  if not isinstance(value, r.TH1):
284  histos[kind][key] = r.TH1F(f'{kind}_{key}', f'{kind}_{key}', 300, -150, 150)
285 
286  return histos
287 
288 
289 def get_merged_collector_histograms(files):
290 
291  CollectorHistograms = {}
292 
293  num_files = len(files)
294  print(f'Looping over {num_files} files')
295  progress(0, num_files)
296  for count, in_file_name in enumerate(files):
297 
298  in_file = r.TFile(str(in_file_name))
299 
300  for algo in time_algorithms:
301 
302  base_dir = f'SVDTimeValidationCollector_{algo}'
303  iov = in_file.Get(f'{base_dir}/RunRange').getIntervalOfValidity()
304  exp, run = iov.getExperimentLow(), iov.getRunLow()
305 
306  if algo not in CollectorHistograms:
307  CollectorHistograms[algo] = {}
308  if exp not in CollectorHistograms[algo]:
309  CollectorHistograms[algo][exp] = {}
310  if run not in CollectorHistograms[algo][exp]:
311  CollectorHistograms[algo][exp][run] = {"hEventT0": [],
312  "hClsTimeOnTracks": [],
313  "hClsTimeAll": [],
314  "hClsDiffTimeOnTracks": [],
315  "hClusterSizeVsTimeResidual": [],
316  "hBinToSensorMap": []}
317 
318  __hEventT0__ = in_file.Get(get_full_path('hEventT0', exp, run, base_dir))
319  __hClsTimeOnTracks__ = in_file.Get(get_full_path('__hClsTimeOnTracks__',
320  exp, run, base_dir))
321  __hClsTimeAll__ = in_file.Get(get_full_path('__hClsTimeAll__',
322  exp, run, base_dir))
323  __hClsDiffTimeOnTracks__ = in_file.Get(get_full_path('__hClsDiffTimeOnTracks__',
324  exp, run, base_dir))
325  __hClusterSizeVsTimeResidual__ = in_file.Get(get_full_path('__hClusterSizeVsTimeResidual__',
326  exp, run, base_dir))
327  __hBinToSensorMap__ = in_file.Get(get_full_path('__hBinToSensorMap__',
328  exp, run, base_dir))
329  __hEventT0__.SetDirectory(0)
330  __hClsTimeOnTracks__.SetDirectory(0)
331  __hClsTimeAll__.SetDirectory(0)
332  __hClsDiffTimeOnTracks__.SetDirectory(0)
333  __hClusterSizeVsTimeResidual__.SetDirectory(0)
334  __hBinToSensorMap__.SetDirectory(0)
335  CollectorHistograms[algo][exp][run]["hEventT0"].append(__hEventT0__)
336  CollectorHistograms[algo][exp][run]["hClsTimeOnTracks"].append(__hClsTimeOnTracks__)
337  CollectorHistograms[algo][exp][run]["hClsTimeAll"].append(__hClsTimeAll__)
338  CollectorHistograms[algo][exp][run]["hClsDiffTimeOnTracks"].append(__hClsDiffTimeOnTracks__)
339  CollectorHistograms[algo][exp][run]["hClusterSizeVsTimeResidual"].append(__hClusterSizeVsTimeResidual__)
340  CollectorHistograms[algo][exp][run]["hBinToSensorMap"].append(__hBinToSensorMap__)
341 
342  in_file.Close()
343 
344  # Show the progress
345  progress(count+1, num_files)
346 
347  print()
348 
349  for algo in CollectorHistograms:
350  for exp in CollectorHistograms[algo]:
351  for run in CollectorHistograms[algo][exp]:
352  for key in CollectorHistograms[algo][exp][run]:
353  for hist in CollectorHistograms[algo][exp][run][key][1:]:
354  CollectorHistograms[algo][exp][run][key][0].Add(hist)
355 
356  return CollectorHistograms