Belle II Software prerelease-11-00-00a
validation_utils.py
1#!/usr/bin/env python3
2
3
10
11import sys
12
13import fnmatch
14import math
15
16import numpy as np
17import matplotlib
18import matplotlib.pyplot as plt
19
20import ROOT as r
21r.PyConfig.IgnoreCommandLineOptions = True
22
23matplotlib.use('Agg')
24plt.style.use("belle2")
25
26
27def progress(count, total):
28 bar_len = 60
29 filled_len = int(round(bar_len * count / total))
30 percents = round(100 * count / total, 1)
31 bar = '=' * filled_len + '-' * (bar_len - filled_len)
32 sys.stdout.write(f'[{bar}] {percents}%\r')
33 sys.stdout.flush()
34
35
36def get_full_path(name, exp, run, base_dir):
37 return f'{base_dir}/{name}/{name}_{exp}.{run}/{name}_1'
38
39
40def get_combined(histos, pattern):
41 '''
42 Sum histograms that match a certain pattern shell-style, e.g.
43 "*" all, "L3*" all layer 3, "*U" all U-sensors
44
45 Parameters:
46 histos (dict{label : TH1}): dictionary of
47 pattern (str): shell-styel matching pattern
48 '''
49
50 good_keys = [key for key in histos.keys() if fnmatch.fnmatch(key, pattern)]
51
52 assert len(good_keys) > 0
53
54 h = histos[good_keys[0]].Clone()
55 h.SetName(h.GetName().split('__')[0]+f'__{pattern}')
56 h.SetTitle(h.GetName().split('__')[0]+f' in {pattern}')
57
58 for key in good_keys[1:]:
59 h.Add(histos[key])
60
61 return h
62
63
64def get_histo_offTracks(histo_all, histo_onTracks):
65 '''
66 Subtract the histogram with the clusters on tracks from the histogram with all the clusters
67 to get the histogram with the clusters off tracks
68 '''
69 try:
70 histo_offTracks = histo_all.Clone(histo_all.GetName().replace('all', 'offTracks'))
71 histo_offTracks.SetTitle(histo_offTracks.GetTitle().replace('All', 'OffTracks'))
72 histo_offTracks.Add(histo_onTracks, -1)
73 return histo_offTracks
74 except (ReferenceError, TypeError):
75 return None
76
77
78def get_agreement(histo_eventT0, histo_diff, min_entries=100):
79 '''
80 Get the mean of the difference between the mean time of the clusters on tracks and
81 the mean eventT0 divided by the RMS of the event T0
82 '''
83 if histo_eventT0.GetEntries() > min_entries and histo_diff.GetEntries() > min_entries:
84 return histo_diff.GetMean()/histo_eventT0.GetRMS()
85 else:
86 return np.nan
87
88
89def get_difference(histo1, histo2, min_entries=100):
90 '''
91 Get the difference between mean of two histograms, e.g. between CDC and SVD event T0
92 Not sure yet i need this, maybe get_agreement() is enough.
93 '''
94 if histo1.GetEntries() > min_entries and histo2.GetEntries() > min_entries:
95 return histo1.GetMean() - histo2.GetMean()
96 else:
97 return np.nan
98
99
100def get_precision(histo_diff, min_entries=100):
101 '''
102 Get the RMS of the difference between the mean time of the clusters on tracks
103 '''
104 if histo_diff.GetEntries() > min_entries:
105 return histo_diff.GetRMS()
106 else:
107 return np.nan
108
109
110def get_agreement2(histo_eventT0, histo_onTracks, min_entries=100):
111 '''
112 Get the difference between the mean time of the clusters on tracks and the mean eventT0 divided by the RMS of the event T0
113 '''
114 # Get the difference in number of sigmas between the mean eventT0 and the mean time of the clusters on tracks
115 if histo_eventT0.GetEntries() > min_entries and histo_onTracks.GetEntries() > min_entries:
116 return (histo_onTracks.GetMean() - histo_eventT0.GetMean())/histo_eventT0.GetRMS()
117 else:
118 return np.nan
119
120
121def get_shift_plot(shift_histos, min_entries=100):
122 '''
123 The creates a 2D plot to visualize the shift of mean of the
124 cluster time distribution for each cluster sizes for each sensor group.
125 '''
126 hShiftVal = None
127 binNumber = 1
128 for key, hShift in shift_histos.items():
129 if hShiftVal is None:
130 nxbins = len(shift_histos)
131 nybins = hShift.GetNbinsY()
132 hShiftVal = r.TH2F("hShiftVal", "Cluster Size VS Shift Values in Each Sensor Group",
133 nxbins, 0.5, nxbins + 0.5, nybins, 0.5, nybins + 0.5)
134 hShiftVal.GetZaxis().SetTitle("(Not fitted) Mean of Cluster Time Distribution (in ns)")
135 hShiftVal.GetYaxis().SetTitle("Cluster Size")
136
137 for ij in range(hShift.GetNbinsY()):
138 hist = hShift.ProjectionX("tmp", ij + 1, ij + 1, "")
139 if hist.GetSumOfWeights() > min_entries:
140 hShiftVal.SetBinContent(binNumber, ij + 1, hist.GetMean())
141
142 hShiftVal.GetXaxis().SetBinLabel(binNumber, key)
143 binNumber += 1
144
145 hShiftVal.SetStats(0)
146 hShiftVal.GetXaxis().LabelsOption("V")
147 return hShiftVal
148
149
150def get_shift_agreement(shift_histo, min_entries=100):
151 '''
152 It calculates the mean of cluster-time distribution for each cluster size.
153 Then returns the average of the squared sum of the means.
154 '''
155 mean_values = []
156 if isinstance(shift_histo, r.TH2):
157 for ij in range(shift_histo.GetNbinsY()):
158 hist = shift_histo.ProjectionX("tmp", ij + 1, ij + 1, "")
159 if hist.GetSumOfWeights() > min_entries:
160 hmean = hist.GetMean()
161 mean_values.append(hmean * hmean)
162 if not len(mean_values):
163 return np.nan
164 else:
165 return np.sqrt(np.sum(mean_values)) / len(mean_values)
166
167
168def make_roc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True):
169 from hist_utils import hist2array
170 dist_sgn = hist2array(hist_sgn)
171 dist_bkg = hist2array(hist_bkg)
172 dist_sgn = dist_sgn/dist_sgn.sum()
173 dist_bkg = dist_bkg/dist_bkg.sum()
174 if two_sided:
175 dist_sgn = np.append(dist_sgn, [0])
176 dist_bkg = np.append(dist_bkg, [0])
177 eff_sgn = tuple(reversed([sum(dist_sgn[i:-(1+i)]) for i in range(math.ceil(len(dist_sgn)/2)+1)]))
178 eff_bkg = tuple(reversed([sum(dist_bkg[i:-(1+i)]) for i in range(math.ceil(len(dist_bkg)/2)+1)]))
179 else:
180 if not lower_is_better:
181 dist_sgn = np.array(tuple(reversed(dist_sgn)))
182 dist_bkg = np.array(tuple(reversed(dist_bkg)))
183 eff_sgn = [sum(dist_sgn[:i+1]) for i in range(len(dist_sgn))]
184 eff_bkg = [sum(dist_bkg[:i+1]) for i in range(len(dist_bkg))]
185 return eff_sgn, [1-i for i in eff_bkg]
186
187
188def get_roc_auc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True, min_entries=100):
189 if hist_sgn.GetEntries() > min_entries and hist_bkg.GetEntries() > min_entries:
190
191 return np.trapz(*reversed(make_roc(
192 hist_sgn, hist_bkg,
193 lower_is_better=lower_is_better,
194 two_sided=two_sided)))
195 else:
196 return np.nan
197
198
199def np2plt_hist(np_hist):
200 return {'x': np_hist[1][0][:-1], 'bins': np_hist[1][0], 'weights': np_hist[0]}
201
202
203def make_combined_plot(pattern, histos, title=None):
204 from hist_utils import hist2array
205 h_onTracks = hist2array(get_combined(histos['onTracks'], pattern), return_edges=True)
206 h_offTracks = hist2array(get_combined(histos['offTracks'], pattern), return_edges=True)
207 h_eventT0 = hist2array(histos['eventT0'], return_edges=True)
208
209 # normalise h_eventT0 to have same number of entries as h_onTracks
210 h_eventT0 = (h_eventT0[0]*sum(h_onTracks[0])/sum(h_eventT0[0]), h_eventT0[1])
211 plt.figure()
212 plt.hist(h_onTracks[1][0][:-1], h_onTracks[1][0], weights=h_onTracks[0], histtype='step', label='on tracks')
213 plt.hist(h_offTracks[1][0][:-1], h_offTracks[1][0], weights=h_offTracks[0], histtype='step', label='off tracks')
214 plt.hist(h_eventT0[1][0][:-1], h_eventT0[1][0], weights=h_eventT0[0], histtype='step', label='event T0')
215 plt.legend(loc='upper right')
216 plt.xlabel('cluster time [ns]')
217 if title is not None:
218 plt.title(title)
219 plt.tight_layout()
220
221
222# get labels for all possible sides
223names_sides = []
224ladder_of_layer = [7, 10, 12, 16]
225sensor_on_layer = [2, 3, 4, 5]
226for layer in range(3, 7):
227 for ladder in range(1, ladder_of_layer[layer-3]+1):
228 for sensor in range(1, sensor_on_layer[layer-3]+1):
229 for side in ['U', 'V']:
230 names_sides.append(f'L{layer}L{ladder}S{sensor}{side}')
231names_layer_sides = [f'L{layer}S{side}' for layer in range(3, 7) for side in ['U', 'V']]
232
233# grouped sensors integrating phi
234names_grouped_sides = []
235for layer in range(3, 7):
236 for sensor in range(1, sensor_on_layer[layer-3]+1):
237 for side in ['U', 'V']:
238 names_grouped_sides.append(f'L{layer}L*S{sensor}{side}')
239
240
241time_algorithms = ['CoG6', 'CoG3', 'ELS3']
242
243
244def get_histos(CollectorHistograms):
245
246 histos = {}
247
248 for det in ['CDC', 'SVD']:
249 try:
250 histos[f'{det}eventT0'] = CollectorHistograms[f'hEventT0From{det}']
251 except KeyError:
252 print(f'No eventT0 histogram for {det}, creating empty histogram')
253 #  empty histos
254 histos[f'{det}eventT0'] = r.TH1F(f'{det}eventT0', f'{det}eventT0', 300, -150, 150)
255 histos['eventT0'] = CollectorHistograms['hEventT0']
256
257 histos_all = {}
258 histos['onTracks'] = {}
259 histos['diff'] = {}
260 histos['timeShifter'] = {}
261 histos['absoluteShift'] = {}
262
263 __hClsTimeOnTracks__ = CollectorHistograms['hClsTimeOnTracks']
264 __hClsTimeAll__ = CollectorHistograms['hClsTimeAll']
265 __hClsDiffTimeOnTracks__ = CollectorHistograms['hClsDiffTimeOnTracks']
266 __hClusterSizeVsTimeResidual__ = CollectorHistograms['hClusterSizeVsTimeResidual']
267 __hBinToSensorMap__ = CollectorHistograms['hBinToSensorMap']
268 __hAbsoluteShiftValues__ = CollectorHistograms['hAbsoluteShiftValues']
269
270 for name_side in names_sides:
271 sensorBin = __hBinToSensorMap__.GetXaxis().FindBin(name_side)
272
273 hClsTimeOnTracks = __hClsTimeOnTracks__.ProjectionX("hClsTimeOnTracks_tmp", sensorBin, sensorBin)
274 hClsTimeAll = __hClsTimeAll__.ProjectionX("hClsTimeAll_tmp", sensorBin, sensorBin)
275 hClsDiffTimeOnTracks = __hClsDiffTimeOnTracks__.ProjectionX("hClsDiffTimeOnTracks_tmp", sensorBin, sensorBin)
276
277 __hClusterSizeVsTimeResidual__.GetZaxis().SetRange(sensorBin, sensorBin)
278 hClusterSizeVsTimeResidual = __hClusterSizeVsTimeResidual__.Project3D("yxe")
279
280 hClsTimeOnTracks.SetNameTitle(f"clsTimeOnTracks__{name_side}", f"clsTimeOnTracks__{name_side}")
281 hClsTimeAll.SetNameTitle(f"clsTimeAll__{name_side}", f"clsTimeAll__{name_side}")
282 hClsDiffTimeOnTracks.SetNameTitle(f"clsDiffTimeOnTracks__{name_side}", f"clsDiffTimeOnTracks__{name_side}")
283 hClusterSizeVsTimeResidual.SetNameTitle(f"clusterSizeVsTimeResidual__{name_side}",
284 f"Cluster Size vs Time Residual in {name_side}")
285
286 hClsTimeOnTracks.SetDirectory(0)
287 hClsTimeAll.SetDirectory(0)
288 hClsDiffTimeOnTracks.SetDirectory(0)
289 hClusterSizeVsTimeResidual.SetDirectory(0)
290
291 histos['onTracks'][name_side] = hClsTimeOnTracks
292 histos_all[name_side] = hClsTimeAll
293 histos['diff'][name_side] = hClsDiffTimeOnTracks
294 histos['timeShifter'][name_side] = hClusterSizeVsTimeResidual
295 # __hAbsoluteShiftValues__.SetDirectory(0)
296 #  for the absolute shift we are iterating on pair layers/side, not individual sensors
297 for layer in range(3, 7):
298 for side in [True, False]:
299 layder_side = f'L{layer}S{"U" if side else "V"}'
300 histos["absoluteShift"][layder_side] = __hAbsoluteShiftValues__.GetBinContent(
301 __hAbsoluteShiftValues__.GetXaxis().FindBin(layder_side))
302
303 histos['offTracks'] = {key: get_histo_offTracks(histos_all[key], histos['onTracks'][key])
304 for key in histos['onTracks']}
305
306 # replace None with empty histograms
307 for kind in ['onTracks', 'offTracks', 'diff']:
308 for key, value in histos[kind].items():
309 if not isinstance(value, r.TH1):
310 histos[kind][key] = r.TH1F(f'{kind}_{key}', f'{kind}_{key}', 300, -150, 150)
311
312 return histos
313
314
315def get_merged_collector_histograms(files):
316
317 CollectorHistograms = {}
318
319 num_files = len(files)
320 print(f'Looping over {num_files} files')
321 progress(0, num_files)
322 for count, in_file_name in enumerate(files):
323
324 in_file = r.TFile(str(in_file_name))
325
326 for algo in time_algorithms:
327
328 base_dir = f'SVDTimeValidationCollector_{algo}'
329 iov = in_file.Get(f'{base_dir}/RunRange').getIntervalOfValidity()
330 exp, run = iov.getExperimentLow(), iov.getRunLow()
331
332 if algo not in CollectorHistograms:
333 CollectorHistograms[algo] = {}
334 if exp not in CollectorHistograms[algo]:
335 CollectorHistograms[algo][exp] = {}
336 if run not in CollectorHistograms[algo][exp]:
337 CollectorHistograms[algo][exp][run] = {"hEventT0": [],
338 "hClsTimeOnTracks": [],
339 "hClsTimeAll": [],
340 "hClsDiffTimeOnTracks": [],
341 "hClusterSizeVsTimeResidual": [],
342 "hBinToSensorMap": [],
343 "hEventT0FromCDC": [],
344 "hEventT0FromSVD": [],
345 "hAbsoluteShiftValues": []
346 }
347
348 __hEventT0__ = in_file.Get(get_full_path('hEventT0', exp, run, base_dir))
349 __hEventT0FromCDC__ = in_file.Get(get_full_path('hEventT0FromCDC', exp, run, base_dir))
350 __hEventT0FromSVD__ = in_file.Get(get_full_path('hEventT0FromSVD', exp, run, base_dir))
351 __hClsTimeOnTracks__ = in_file.Get(get_full_path('__hClsTimeOnTracks__',
352 exp, run, base_dir))
353 __hClsTimeAll__ = in_file.Get(get_full_path('__hClsTimeAll__',
354 exp, run, base_dir))
355 __hClsDiffTimeOnTracks__ = in_file.Get(get_full_path('__hClsDiffTimeOnTracks__',
356 exp, run, base_dir))
357 __hClusterSizeVsTimeResidual__ = in_file.Get(get_full_path('__hClusterSizeVsTimeResidual__',
358 exp, run, base_dir))
359 __hBinToSensorMap__ = in_file.Get(get_full_path('__hBinToSensorMap__',
360 exp, run, base_dir))
361 __hAbsoluteShiftValues__ = in_file.Get(get_full_path('hAbsoluteShiftValues',
362 exp, run, base_dir))
363
364 __hEventT0__.SetDirectory(0)
365 __hEventT0FromCDC__.SetDirectory(0)
366 __hEventT0FromSVD__.SetDirectory(0)
367 __hClsTimeOnTracks__.SetDirectory(0)
368 __hClsTimeAll__.SetDirectory(0)
369 __hClsDiffTimeOnTracks__.SetDirectory(0)
370 __hClusterSizeVsTimeResidual__.SetDirectory(0)
371 __hBinToSensorMap__.SetDirectory(0)
372 __hAbsoluteShiftValues__.SetDirectory(0)
373 CollectorHistograms[algo][exp][run]["hEventT0"].append(__hEventT0__)
374 CollectorHistograms[algo][exp][run]["hClsTimeOnTracks"].append(__hClsTimeOnTracks__)
375 CollectorHistograms[algo][exp][run]["hClsTimeAll"].append(__hClsTimeAll__)
376 CollectorHistograms[algo][exp][run]["hClsDiffTimeOnTracks"].append(__hClsDiffTimeOnTracks__)
377 CollectorHistograms[algo][exp][run]["hClusterSizeVsTimeResidual"].append(__hClusterSizeVsTimeResidual__)
378 CollectorHistograms[algo][exp][run]["hBinToSensorMap"].append(__hBinToSensorMap__)
379 CollectorHistograms[algo][exp][run]["hEventT0FromCDC"].append(__hEventT0FromCDC__)
380 CollectorHistograms[algo][exp][run]["hEventT0FromSVD"].append(__hEventT0FromSVD__)
381 # absolute shift values are the same for a given exp and run, so don't append.
382 # just overwrite: if not already present it is setting the histo,
383 # overwrting else. It avoids logic of checking wether or not it is already
384 # here
385 CollectorHistograms[algo][exp][run]["hAbsoluteShiftValues"] = [__hAbsoluteShiftValues__]
386
387 in_file.Close()
388
389 # Show the progress
390 progress(count+1, num_files)
391
392 print()
393
394 for algo in CollectorHistograms:
395 for exp in CollectorHistograms[algo]:
396 for run in CollectorHistograms[algo][exp]:
397 for key in CollectorHistograms[algo][exp][run]:
398 for hist in CollectorHistograms[algo][exp][run][key][1:]:
399 CollectorHistograms[algo][exp][run][key][0].Add(hist)
400 CollectorHistograms[algo][exp][run][key] = CollectorHistograms[algo][exp][run][key][0]
401
402 return CollectorHistograms