Belle II Software development
validation_utils.py
1#!/usr/bin/env python3
2
3
10
11import sys
12
13import fnmatch
14import math
15
16import numpy as np
17import matplotlib
18import matplotlib.pyplot as plt
19
20import ROOT as r
21r.PyConfig.IgnoreCommandLineOptions = True
22
23matplotlib.use('Agg')
24plt.style.use("belle2")
25
26
27def progress(count, total):
28 bar_len = 60
29 filled_len = int(round(bar_len * count / total))
30 percents = round(100 * count / total, 1)
31 bar = '=' * filled_len + '-' * (bar_len - filled_len)
32 sys.stdout.write(f'[{bar}] {percents}%\r')
33 sys.stdout.flush()
34
35
36def get_full_path(name, exp, run, base_dir):
37 return f'{base_dir}/{name}/{name}_{exp}.{run}/{name}_1'
38
39
40def get_combined(histos, pattern):
41 '''
42 Sum histograms that match a certain pattern shell-style, e.g.
43 "*" all, "L3*" all layer 3, "*U" all U-sensors
44
45 Parameters:
46 histos (dict{label : TH1}): dictionary of
47 patern (str): shell-styel matching pattern
48 '''
49
50 good_keys = [key for key in histos.keys() if fnmatch.fnmatch(key, pattern)]
51
52 assert len(good_keys) > 0
53
54 h = histos[good_keys[0]].Clone()
55 h.SetName(h.GetName().split('__')[0]+f'__{pattern}')
56 h.SetTitle(h.GetName().split('__')[0]+f' in {pattern}')
57
58 for key in good_keys[1:]:
59 h.Add(histos[key])
60
61 return h
62
63
64def get_histo_offTracks(histo_all, histo_onTracks):
65 '''
66 Substract the histogram with the clusters on tracks from the histogram with all the clusters
67 to get the histogram with the clusters off tracks
68 '''
69 try:
70 histo_offTracks = histo_all.Clone(histo_all.GetName().replace('all', 'offTracks'))
71 histo_offTracks.SetTitle(histo_offTracks.GetTitle().replace('All', 'OffTracks'))
72 histo_offTracks.Add(histo_onTracks, -1)
73 return histo_offTracks
74 except (ReferenceError, TypeError):
75 return None
76
77
78def get_agreement(histo_eventT0, histo_diff, min_entries=100):
79 '''
80 Get the mean of the difference between the mean time of the clusters on tracks and
81 the mean eventT0 divided by the RMS of the event T0
82 '''
83 if histo_eventT0.GetEntries() > min_entries and histo_diff.GetEntries() > min_entries:
84 return histo_diff.GetMean()/histo_eventT0.GetRMS()
85 else:
86 return np.nan
87
88
89def get_precision(histo_diff, min_entries=100):
90 '''
91 Get the RMS of the difference between the mean time of the clusters on tracks
92 '''
93 if histo_diff.GetEntries() > min_entries:
94 return histo_diff.GetRMS()
95 else:
96 return np.nan
97
98
99def get_agreement2(histo_eventT0, histo_onTracks, min_entries=100):
100 '''
101 Get the difference between the mean time of the clusters on tracks and the mean eventT0 divided by the RMS of the event T0
102 '''
103 # Get the difference in number of sigmas between the mean eventT0 and the mean time of the clusters on tracks
104 if histo_eventT0.GetEntries() > min_entries and histo_onTracks.GetEntries() > min_entries:
105 return (histo_onTracks.GetMean() - histo_eventT0.GetMean())/histo_eventT0.GetRMS()
106 else:
107 return np.nan
108
109
110def get_shift_plot(shift_histos, min_entries=100):
111 '''
112 The creates a 2D plot to visualize the shift of mean of the
113 cluster time distribution for each cluster sizes for each sensor group.
114 '''
115 hShiftVal = None
116 binNumber = 1
117 for key, hShift in shift_histos.items():
118 if hShiftVal is None:
119 nxbins = len(shift_histos)
120 nybins = hShift.GetNbinsY()
121 hShiftVal = r.TH2F("hShiftVal", "Cluster Size VS Shift Values in Each Sensor Group",
122 nxbins, 0.5, nxbins + 0.5, nybins, 0.5, nybins + 0.5)
123 hShiftVal.GetZaxis().SetTitle("(Not fitted) Mean of Cluster Time Distribution (in ns)")
124 hShiftVal.GetYaxis().SetTitle("Cluster Size")
125
126 for ij in range(hShift.GetNbinsY()):
127 hist = hShift.ProjectionX("tmp", ij + 1, ij + 1, "")
128 if hist.GetSumOfWeights() > min_entries:
129 hShiftVal.SetBinContent(binNumber, ij + 1, hist.GetMean())
130
131 hShiftVal.GetXaxis().SetBinLabel(binNumber, key)
132 binNumber += 1
133
134 hShiftVal.SetStats(0)
135 hShiftVal.GetXaxis().LabelsOption("V")
136 return hShiftVal
137
138
139def get_shift_agreement(shift_histo, min_entries=100):
140 '''
141 It calculates the mean of cluster-time distribution for each cluster size.
142 Then returns the average of the squared sum of the means.
143 '''
144 mean_values = []
145 if isinstance(shift_histo, r.TH2):
146 for ij in range(shift_histo.GetNbinsY()):
147 hist = shift_histo.ProjectionX("tmp", ij + 1, ij + 1, "")
148 if hist.GetSumOfWeights() > min_entries:
149 hmean = hist.GetMean()
150 mean_values.append(hmean * hmean)
151 if not len(mean_values):
152 return np.nan
153 else:
154 return np.sqrt(np.sum(mean_values)) / len(mean_values)
155
156
157def make_roc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True):
158 from hist_utils import hist2array
159 dist_sgn = hist2array(hist_sgn)
160 dist_bkg = hist2array(hist_bkg)
161 dist_sgn = dist_sgn/dist_sgn.sum()
162 dist_bkg = dist_bkg/dist_bkg.sum()
163 if two_sided:
164 dist_sgn = np.append(dist_sgn, [0])
165 dist_bkg = np.append(dist_bkg, [0])
166 eff_sgn = tuple(reversed([sum(dist_sgn[i:-(1+i)]) for i in range(math.ceil(len(dist_sgn)/2)+1)]))
167 eff_bkg = tuple(reversed([sum(dist_bkg[i:-(1+i)]) for i in range(math.ceil(len(dist_bkg)/2)+1)]))
168 else:
169 if not lower_is_better:
170 dist_sgn = np.array(tuple(reversed(dist_sgn)))
171 dist_bkg = np.array(tuple(reversed(dist_bkg)))
172 eff_sgn = [sum(dist_sgn[:i+1]) for i in range(len(dist_sgn))]
173 eff_bkg = [sum(dist_bkg[:i+1]) for i in range(len(dist_bkg))]
174 return eff_sgn, [1-i for i in eff_bkg]
175
176
177def get_roc_auc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True, min_entries=100):
178 if hist_sgn.GetEntries() > min_entries and hist_bkg.GetEntries() > min_entries:
179
180 return np.trapz(*reversed(make_roc(
181 hist_sgn, hist_bkg,
182 lower_is_better=lower_is_better,
183 two_sided=two_sided)))
184 else:
185 return np.nan
186
187
188def np2plt_hist(np_hist):
189 return {'x': np_hist[1][0][:-1], 'bins': np_hist[1][0], 'weights': np_hist[0]}
190
191
192def make_combined_plot(pattern, histos, title=None):
193 from hist_utils import hist2array
194 h_onTracks = hist2array(get_combined(histos['onTracks'], pattern), return_edges=True)
195 h_offTracks = hist2array(get_combined(histos['offTracks'], pattern), return_edges=True)
196 h_eventT0 = hist2array(histos['eventT0'], return_edges=True)
197
198 # normalise h_eventT0 to have same number of entries as h_onTracks
199 h_eventT0 = (h_eventT0[0]*sum(h_onTracks[0])/sum(h_eventT0[0]), h_eventT0[1])
200
201 plt.figure()
202 plt.hist(h_onTracks[1][0][:-1], h_onTracks[1][0], weights=h_onTracks[0], histtype='step', label='on tracks')
203 plt.hist(h_offTracks[1][0][:-1], h_offTracks[1][0], weights=h_offTracks[0], histtype='step', label='off tracks')
204 plt.hist(h_eventT0[1][0][:-1], h_eventT0[1][0], weights=h_eventT0[0], histtype='step', label='event T0')
205 plt.legend(loc='upper right')
206 plt.xlabel('cluster time [ns]')
207 if title is not None:
208 plt.title(title)
209 plt.tight_layout()
210
211
212# get labels for all possible sides
213names_sides = []
214ladder_of_layer = [7, 10, 12, 16]
215sensor_on_layer = [2, 3, 4, 5]
216for layer in range(3, 7):
217 for ladder in range(1, ladder_of_layer[layer-3]+1):
218 for sensor in range(1, sensor_on_layer[layer-3]+1):
219 for side in ['U', 'V']:
220 names_sides.append(f'L{layer}L{ladder}S{sensor}{side}')
221
222
223# grouped sensors integrating phi
224names_grouped_sides = []
225for layer in range(3, 7):
226 for sensor in range(1, sensor_on_layer[layer-3]+1):
227 for side in ['U', 'V']:
228 names_grouped_sides.append(f'L{layer}L*S{sensor}{side}')
229
230
231time_algorithms = ['CoG6', 'CoG3', 'ELS3']
232
233
234def get_histos(CollectorHistograms):
235
236 histos = {}
237
238 histos['eventT0'] = CollectorHistograms['hEventT0'][0]
239
240 histos_all = {}
241 histos['onTracks'] = {}
242 histos['diff'] = {}
243 histos['timeShifter'] = {}
244
245 __hClsTimeOnTracks__ = CollectorHistograms['hClsTimeOnTracks'][0]
246 __hClsTimeAll__ = CollectorHistograms['hClsTimeAll'][0]
247 __hClsDiffTimeOnTracks__ = CollectorHistograms['hClsDiffTimeOnTracks'][0]
248 __hClusterSizeVsTimeResidual__ = CollectorHistograms['hClusterSizeVsTimeResidual'][0]
249 __hBinToSensorMap__ = CollectorHistograms['hBinToSensorMap'][0]
250
251 for name_side in names_sides:
252 sensorBin = __hBinToSensorMap__.GetXaxis().FindBin(name_side)
253
254 hClsTimeOnTracks = __hClsTimeOnTracks__.ProjectionX("hClsTimeOnTracks_tmp", sensorBin, sensorBin)
255 hClsTimeAll = __hClsTimeAll__.ProjectionX("hClsTimeAll_tmp", sensorBin, sensorBin)
256 hClsDiffTimeOnTracks = __hClsDiffTimeOnTracks__.ProjectionX("hClsDiffTimeOnTracks_tmp", sensorBin, sensorBin)
257
258 __hClusterSizeVsTimeResidual__.GetZaxis().SetRange(sensorBin, sensorBin)
259 hClusterSizeVsTimeResidual = __hClusterSizeVsTimeResidual__.Project3D("yxe")
260
261 hClsTimeOnTracks.SetNameTitle(f"clsTimeOnTracks__{name_side}", f"clsTimeOnTracks__{name_side}")
262 hClsTimeAll.SetNameTitle(f"clsTimeAll__{name_side}", f"clsTimeAll__{name_side}")
263 hClsDiffTimeOnTracks.SetNameTitle(f"clsDiffTimeOnTracks__{name_side}", f"clsDiffTimeOnTracks__{name_side}")
264 hClusterSizeVsTimeResidual.SetNameTitle(f"clusterSizeVsTimeResidual__{name_side}",
265 f"Cluster Size vs Time Residual in {name_side}")
266
267 hClsTimeOnTracks.SetDirectory(0)
268 hClsTimeAll.SetDirectory(0)
269 hClsDiffTimeOnTracks.SetDirectory(0)
270 hClusterSizeVsTimeResidual.SetDirectory(0)
271
272 histos['onTracks'][name_side] = hClsTimeOnTracks
273 histos_all[name_side] = hClsTimeAll
274 histos['diff'][name_side] = hClsDiffTimeOnTracks
275 histos['timeShifter'][name_side] = hClusterSizeVsTimeResidual
276
277 histos['offTracks'] = {key: get_histo_offTracks(histos_all[key], histos['onTracks'][key])
278 for key in histos['onTracks']}
279
280 # replace None with empty histograms
281 for kind in ['onTracks', 'offTracks', 'diff']:
282 for key, value in histos[kind].items():
283 if not isinstance(value, r.TH1):
284 histos[kind][key] = r.TH1F(f'{kind}_{key}', f'{kind}_{key}', 300, -150, 150)
285
286 return histos
287
288
289def get_merged_collector_histograms(files):
290
291 CollectorHistograms = {}
292
293 num_files = len(files)
294 print(f'Looping over {num_files} files')
295 progress(0, num_files)
296 for count, in_file_name in enumerate(files):
297
298 in_file = r.TFile(str(in_file_name))
299
300 for algo in time_algorithms:
301
302 base_dir = f'SVDTimeValidationCollector_{algo}'
303 iov = in_file.Get(f'{base_dir}/RunRange').getIntervalOfValidity()
304 exp, run = iov.getExperimentLow(), iov.getRunLow()
305
306 if algo not in CollectorHistograms:
307 CollectorHistograms[algo] = {}
308 if exp not in CollectorHistograms[algo]:
309 CollectorHistograms[algo][exp] = {}
310 if run not in CollectorHistograms[algo][exp]:
311 CollectorHistograms[algo][exp][run] = {"hEventT0": [],
312 "hClsTimeOnTracks": [],
313 "hClsTimeAll": [],
314 "hClsDiffTimeOnTracks": [],
315 "hClusterSizeVsTimeResidual": [],
316 "hBinToSensorMap": []}
317
318 __hEventT0__ = in_file.Get(get_full_path('hEventT0', exp, run, base_dir))
319 __hClsTimeOnTracks__ = in_file.Get(get_full_path('__hClsTimeOnTracks__',
320 exp, run, base_dir))
321 __hClsTimeAll__ = in_file.Get(get_full_path('__hClsTimeAll__',
322 exp, run, base_dir))
323 __hClsDiffTimeOnTracks__ = in_file.Get(get_full_path('__hClsDiffTimeOnTracks__',
324 exp, run, base_dir))
325 __hClusterSizeVsTimeResidual__ = in_file.Get(get_full_path('__hClusterSizeVsTimeResidual__',
326 exp, run, base_dir))
327 __hBinToSensorMap__ = in_file.Get(get_full_path('__hBinToSensorMap__',
328 exp, run, base_dir))
329 __hEventT0__.SetDirectory(0)
330 __hClsTimeOnTracks__.SetDirectory(0)
331 __hClsTimeAll__.SetDirectory(0)
332 __hClsDiffTimeOnTracks__.SetDirectory(0)
333 __hClusterSizeVsTimeResidual__.SetDirectory(0)
334 __hBinToSensorMap__.SetDirectory(0)
335 CollectorHistograms[algo][exp][run]["hEventT0"].append(__hEventT0__)
336 CollectorHistograms[algo][exp][run]["hClsTimeOnTracks"].append(__hClsTimeOnTracks__)
337 CollectorHistograms[algo][exp][run]["hClsTimeAll"].append(__hClsTimeAll__)
338 CollectorHistograms[algo][exp][run]["hClsDiffTimeOnTracks"].append(__hClsDiffTimeOnTracks__)
339 CollectorHistograms[algo][exp][run]["hClusterSizeVsTimeResidual"].append(__hClusterSizeVsTimeResidual__)
340 CollectorHistograms[algo][exp][run]["hBinToSensorMap"].append(__hBinToSensorMap__)
341
342 in_file.Close()
343
344 # Show the progress
345 progress(count+1, num_files)
346
347 print()
348
349 for algo in CollectorHistograms:
350 for exp in CollectorHistograms[algo]:
351 for run in CollectorHistograms[algo][exp]:
352 for key in CollectorHistograms[algo][exp][run]:
353 for hist in CollectorHistograms[algo][exp][run][key][1:]:
354 CollectorHistograms[algo][exp][run][key][0].Add(hist)
355
356 return CollectorHistograms