Belle II Software  release-05-02-19
validation_utils.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 import fnmatch
5 import math
6 
7 import numpy as np
8 import matplotlib
9 import matplotlib.pyplot as plt
10 
11 import ROOT as r
12 r.PyConfig.IgnoreCommandLineOptions = True
13 
14 matplotlib.use('Agg')
15 plt.style.use("belle2")
16 
17 
18 def get_full_path(name, exp, run, base_dir):
19  return f'{base_dir}/{name}/{name}_{exp}.{run}/{name}_1'
20 
21 
22 def get_combined(histos, pattern):
23  '''
24  Sum histograms that match a certain pattern shell-style, e.g.
25  "*" all, "L3*" all layer 3, "*U" all U-sensors
26 
27  Parameters:
28  histos (dict{label : TH1}): dictionary of
29  patern (str): shell-styel matching pattern
30  '''
31 
32  good_keys = [key for key in histos.keys() if fnmatch.fnmatch(key, pattern)]
33 
34  assert len(good_keys) > 0
35 
36  h = histos[good_keys[0]].Clone()
37  h.SetName(h.GetName().split('__')[0]+f'__{pattern}')
38  h.SetTitle(h.GetName().split('__')[0]+f' in {pattern}')
39 
40  for key in good_keys[1:]:
41  h.Add(histos[key])
42 
43  return h
44 
45 
46 def get_histo_offTracks(histo_all, histo_onTracks):
47  '''
48  Substract the histogram with the clusters on tracks from the histogram with all the clusters
49  to get the histogram with the clusters off tracks
50  '''
51  try:
52  histo_offTracks = histo_all.Clone(histo_all.GetName().replace('all', 'offTracks'))
53  histo_offTracks.SetTitle(histo_offTracks.GetTitle().replace('All', 'OffTracks'))
54  histo_offTracks.Add(histo_onTracks, -1)
55  return histo_offTracks
56  except (ReferenceError, TypeError):
57  return None
58 
59 
60 def get_agreament(histo_eventT0, histo_diff, min_entries=100):
61  '''
62  Get the mean of the difference between the mean time of the clusters on tracks and
63  the mean eventT0 divided by the RMS of the event T0
64  '''
65  if histo_eventT0.GetEntries() > min_entries and histo_diff.GetEntries() > min_entries:
66  return histo_diff.GetMean()/histo_eventT0.GetRMS()
67  else:
68  return np.nan
69 
70 
71 def get_precision(histo_diff, min_entries=100):
72  '''
73  Get the RMS of the difference between the mean time of the clusters on tracks
74  '''
75  if histo_diff.GetEntries() > min_entries:
76  return histo_diff.GetRMS()
77  else:
78  return np.nan
79 
80 
81 def get_agreament2(histo_eventT0, histo_onTracks, min_entries=100):
82  '''
83  Get the difference between the mean time of the clusters on tracks and the mean eventT0 divided by the RMS of the event T0
84  '''
85  # Get the difference in number of sigmas between the mean eventT0 and the mean time of the clusters on tracks
86  if histo_eventT0.GetEntries() > min_entries and histo_onTracks.GetEntries() > min_entries:
87  return (histo_onTracks.GetMean() - histo_eventT0.GetMean())/histo_eventT0.GetRMS()
88  else:
89  return np.nan
90 
91 
92 def make_roc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True):
93  import root_numpy
94  dist_sgn = root_numpy.hist2array(hist_sgn)
95  dist_bkg = root_numpy.hist2array(hist_bkg)
96  dist_sgn = dist_sgn/dist_sgn.sum()
97  dist_bkg = dist_bkg/dist_bkg.sum()
98  if two_sided:
99  dist_sgn = np.append(dist_sgn, [0])
100  dist_bkg = np.append(dist_bkg, [0])
101  eff_sgn = tuple(reversed([sum(dist_sgn[i:-(1+i)]) for i in range(math.ceil(len(dist_sgn)/2)+1)]))
102  eff_bkg = tuple(reversed([sum(dist_bkg[i:-(1+i)]) for i in range(math.ceil(len(dist_bkg)/2)+1)]))
103  else:
104  if not lower_is_better:
105  dist_sgn = np.array(tuple(reversed(dist_sgn)))
106  dist_bkg = np.array(tuple(reversed(dist_bkg)))
107  eff_sgn = [sum(dist_sgn[:i+1]) for i in range(len(dist_sgn))]
108  eff_bkg = [sum(dist_bkg[:i+1]) for i in range(len(dist_bkg))]
109  return eff_sgn, [1-i for i in eff_bkg]
110 
111 
112 def get_roc_auc(hist_sgn, hist_bkg, lower_is_better=False, two_sided=True, min_entries=100):
113  if hist_sgn.GetEntries() > min_entries and hist_bkg.GetEntries() > min_entries:
114 
115  return np.trapz(*reversed(make_roc(
116  hist_sgn, hist_bkg,
117  lower_is_better=lower_is_better,
118  two_sided=two_sided)))
119  else:
120  return np.nan
121 
122 
123 def np2plt_hist(np_hist):
124  return {'x': np_hist[1][0][:-1], 'bins': np_hist[1][0], 'weights': np_hist[0]}
125 
126 
127 def make_combined_plot(pattern, histos, title=None):
128  import root_numpy
129  h_onTracks = root_numpy.hist2array(get_combined(histos['onTracks'], pattern), return_edges=True)
130  h_offTracks = root_numpy.hist2array(get_combined(histos['offTracks'], pattern), return_edges=True)
131  h_eventT0 = root_numpy.hist2array(histos['eventT0'], return_edges=True)
132 
133  # normalise h_eventT0 to have same number of entries as h_onTracks
134  h_eventT0 = (h_eventT0[0]*sum(h_onTracks[0])/sum(h_eventT0[0]), h_eventT0[1])
135 
136  plt.figure()
137  plt.hist(h_onTracks[1][0][:-1], h_onTracks[1][0], weights=h_onTracks[0], histtype='step', label='on tracks')
138  plt.hist(h_offTracks[1][0][:-1], h_offTracks[1][0], weights=h_offTracks[0], histtype='step', label='off tracks')
139  plt.hist(h_eventT0[1][0][:-1], h_eventT0[1][0], weights=h_eventT0[0], histtype='step', label='event T0')
140  plt.legend(loc='upper right')
141  plt.xlabel('cluster time [ns]')
142  if title is not None:
143  plt.title(title)
144  plt.tight_layout()
145 
146 
147 # get labels for all possible sides
148 names_sides = []
149 ladder_of_layer = [7, 10, 12, 16]
150 sensor_on_layer = [2, 3, 4, 5]
151 for layer in range(3, 7):
152  for ladder in range(1, ladder_of_layer[layer-3]+1):
153  for sensor in range(1, sensor_on_layer[layer-3]+1):
154  for side in ['U', 'V']:
155  names_sides.append(f'L{layer}L{ladder}S{sensor}{side}')
156 
157 
158 # grouped sensors integrating phi
159 names_grouped_sides = []
160 for layer in range(3, 7):
161  for sensor in range(1, sensor_on_layer[layer-3]+1):
162  for side in ['U', 'V']:
163  names_grouped_sides.append(f'L{layer}L*S{sensor}{side}')
164 
165 
166 time_algorithms = ['CoG6', 'CoG3', 'ELS3']
167 
168 
169 def get_histos(in_file, algo='CoG3',
170  names_sides=names_sides,
171  base_dirs={algo: f'SVDTimeValidationCollector_{algo}'
172  for algo in time_algorithms}):
173 
174  histos = {}
175 
176  base_dir = base_dirs[algo]
177  iov = in_file.Get(f'{base_dir}/RunRange').getIntervalOfValidity()
178  exp, run = iov.getExperimentLow(), iov.getRunLow()
179 
180  histos['eventT0'] = in_file.Get(get_full_path(f'hEventT0', exp, run, base_dir))
181 
182  histos_all = {name_side: in_file.Get(get_full_path(f'clsTimeAll__{name_side}', exp, run, base_dir))
183  for name_side in names_sides}
184  histos['onTracks'] = {name_side: in_file.Get(get_full_path(f'clsTimeOnTracks__{name_side}', exp, run, base_dir))
185  for name_side in names_sides}
186  histos['offTracks'] = {key: get_histo_offTracks(histos_all[key], histos['onTracks'][key])
187  for key in histos['onTracks']}
188  histos['diff'] = {name_side: in_file.Get(get_full_path(f'clsDiffTimeOnTracks__{name_side}', exp, run, base_dir))
189  for name_side in names_sides}
190 
191  # replace None with empty histograms
192  for kind in ['onTracks', 'offTracks', 'diff']:
193  for key, value in histos[kind].items():
194  if not isinstance(value, r.TH1):
195  histos[kind][key] = r.TH1F(f'{kind}_{key}_{algo}', f'{kind}_{key}_{algo}', 300, -150, 150)
196 
197  return histos, exp, run