Belle II Software development
resolution.py
1#!/usr/bin/env python3
2
3
10
11from tracking.validation.plot import ValidationPlot, compose_axis_label, get_unit
12
13# get error function as a np.ufunc vectorised for numpy array
14from tracking.root_utils import root_save_name
15
16import collections
17
18import numpy as np
19
20from tracking.validation.tolerate_missing_key_formatter import TolerateMissingKeyFormatter
21
22formatter = TolerateMissingKeyFormatter()
23
24
26 """Perform resolution analysis"""
27
28
29 default_outlier_z_score = 5.0
30
31 default_min_required_entries = 50
32
33 default_plot_name = "{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
34
35 default_plot_title = "{subplot_title} of {quantity_name}{plot_title_postfix}"
36
37 default_which_plots = [
38 "resolution",
39 ]
40
41
42 default_is_expert = True
43
45 self,
46 quantity_name,
47 bin_spacing, # can be [0,0.5,1.0] will have 2 bins
48 bin_name,
49 bin_unit=None,
50 unit=None,
51 outlier_z_score=None,
52 contact='',
53 plot_name=None,
54 plot_title=None,
55 min_required_entries=None, # minimum number of entries in a bin for the resolution fit
56 plot_name_prefix='', # depricated use plot_name instead
57 plot_name_postfix='', # depricated use plot_name instead
58 plot_title_postfix='', # depricated use plot_title instead
59 referenceFileName=None, # if set binnings of histograms will be read from corresponding histogram in this file
60 ):
61 """Performs a comparison of an estimated quantity to their truths by generating standardized validation plots."""
62
63
64 self.quantity_name = quantity_name
65
66 self.unit = unit or get_unit(quantity_name)
67
68 self.bin_spacing = bin_spacing
69
70 self.bin_name = bin_name
71
72 self.bin_unit = bin_unit
73
74 if outlier_z_score is None:
75
77 else:
78 self.outlier_z_score = outlier_z_score
79
80
81 self.min_required_entries = min_required_entries
82 if self.min_required_entries is None:
84
85
86 self.plot_name = plot_name
87
88 self.plot_title = plot_title
89
90
91 self.plot_name_prefix = plot_name_prefix or root_save_name(quantity_name)
92
93 self.plot_name_postfix = plot_name_postfix
94
95 self.plot_title_postfix = plot_title_postfix
96
97
98 self._contact = contact
99
100 self.plots = collections.OrderedDict()
101
102
103 self.referenceFileName = referenceFileName
104
106 self,
107 bin_values,
108 truths,
109 estimates,
110 which_plots=None,
111 is_expert=None
112 ):
113 """Compares the concrete estimate to the truth and generates plots of the resolution
114
115 Parameters
116 ----------
117 bin_values : array_like(float
118 The parametr used for binning
119 truths : array_like(float)
120 Sample of the true values
121 estimates : array_like(float)
122 Corresponding estimations
123 """
124
125 if is_expert is None:
126 is_expert = self.default_is_expert
127
128 if which_plots is None:
129 which_plots = self.default_which_plots
130
131 quantity_name = self.quantity_name
132
133 # axis_label = compose_axis_label(quantity_name, self.unit)
134
135 plot_name_prefix = self.plot_name_prefix
136 outlier_z_score = self.outlier_z_score
137
138 plot_name = self.plot_name
139 if plot_name is None:
140 plot_name = self.default_plot_name
141
142 plot_name = formatter.format(plot_name,
143 quantity_name=quantity_name,
144 plot_name_prefix=plot_name_prefix,
145 plot_name_postfix=self.plot_name_postfix)
146
147 plot_title = self.plot_title
148 if plot_title is None:
149 plot_title = self.default_plot_title
150
151 plot_title = formatter.format(plot_title,
152 quantity_name=quantity_name,
153 plot_title_postfix=self.plot_title_postfix)
154
155 # compute residuals
156 residuals = estimates - truths
157
158 # Resolution #
159
160 if "resolution" in which_plots:
161
162 # creating plots for all configured bins
163 res_histogram = []
164 resolution_values = []
165
166 for i in range(len(self.bin_spacing) - 1):
167 lower_bin = self.bin_spacing[i]
168 upper_bin = self.bin_spacing[i + 1]
169 assert (lower_bin < upper_bin)
170 bin_center = lower_bin + (upper_bin - lower_bin) / 2.0
171 assert (len(bin_values) == len(residuals))
172
173 # compile a list of values which are in this bin
174 sel_residuals = collections.deque()
175
176 for i in range(len(bin_values)):
177 if bin_values[i] >= lower_bin and bin_values[i] < upper_bin:
178 sel_residuals.append(residuals[i])
179
180 residuals_hist_name = formatter.format(plot_name, subplot_name="residuals") + \
181 f"{lower_bin}_to_{upper_bin}"
182 vplot = ValidationPlot(residuals_hist_name, self.referenceFileName)
183 vplot.hist(sel_residuals,
184 outlier_z_score=outlier_z_score,
185 is_expert=is_expert)
186 vplot.xlabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
187 vplot.title = formatter.format(plot_title, subplot_title='Residual distribution')
188
189 # this values will stay None if no fit could be performed
190 gaus_sigma = None
191 gaus_sigma_err = None
192
193 # check if the minimum number of entries are in the histogram
194 if vplot.histograms[0].GetEntries() >= self.min_required_entries:
195 fit_res = vplot.fit_gaus(z_score=1)
196
197 # extract fit result from ROOT's TFitResut
198 params = fit_res.GetParams()
199 errs = fit_res.Errors()
200
201 # gaus_mean = params[1]
202 gaus_sigma = params[2]
203 gaus_sigma_err = errs[2]
204
205 res_histogram += [(lower_bin, upper_bin, bin_center, vplot)]
206 self.plots['residuals' + residuals_hist_name] = vplot
207
208 # store the fit results
209 resolution_values += [(lower_bin, upper_bin, bin_center, gaus_sigma, gaus_sigma_err)]
210
211 resolution_graph_name = formatter.format(plot_name, subplot_name="resolution")
212 resolution_graph = ValidationPlot(resolution_graph_name, self.referenceFileName)
213
214 # compile all requried data going into the final TGraphErrors
215 xs = []
216 xs_err = []
217 ys = []
218 ys_err = []
219
220 for v in resolution_values:
221 # could be None if no fit was possible for this bin
222 if v[3]:
223 xs += [v[2]]
224 xs_err = [0.0]
225 ys += [v[3]]
226 ys_err = [v[4]]
227
228 # convert to numpy array before giving to the plotting code
229 resolution_graph.grapherrors((np.array(xs), np.array(xs_err)), (np.array(ys), np.array(ys_err)),
230 is_expert=is_expert)
231 resolution_graph.xlabel = compose_axis_label(self.bin_name, self.bin_unit)
232 resolution_graph.ylabel = compose_axis_label(self.quantity_name, self.unit)
233 resolution_graph.title = formatter.format(plot_title, subplot_title='Resolution')
234
235 self.plots[resolution_graph_name] = resolution_graph
236
237
239
240 @property
241 def contact(self):
242 """Get the contact person's name"""
243 return self._contact
244
245 @contact.setter
246 def contact(self, contact):
247 """Set the contact person's name"""
248 self._contact = contact
249 for validation_plot in list(self.plots.values()):
250 validation_plot.contact = contact
251
252 def write(self, tDirectory=None):
253 """Write all validation plot to the given Root directory"""
254 for validation_plot in list(self.plots.values()):
255 validation_plot.write(tDirectory)
int default_min_required_entries
default minimum number of entries
Definition: resolution.py:31
quantity_name
cached name of the quantity in the truth-classification analysis
Definition: resolution.py:64
contact
Forward the contact to all plots by reassigning the contact.
Definition: resolution.py:238
plot_title
cached value of the plot title
Definition: resolution.py:88
outlier_z_score
cached value of the Z-score (for outlier detection)
Definition: resolution.py:76
plots
cached value of the dictionary of plots to be created
Definition: resolution.py:100
float default_outlier_z_score
default Z-score (for outlier detection)
Definition: resolution.py:29
unit
cached measurement unit for this truth-classification analysis
Definition: resolution.py:66
_contact
cached value of the contact person
Definition: resolution.py:98
plot_name_postfix
cached value of the suffix appended to the plot name
Definition: resolution.py:93
plot_name
cached value of the base name of the plot
Definition: resolution.py:86
bin_unit
cached value of the bin measurement unit
Definition: resolution.py:72
plot_title_postfix
cached value of the suffix appended to the plot title
Definition: resolution.py:95
min_required_entries
cached value of the minimum number of entries
Definition: resolution.py:81
bin_spacing
cached value of the histogram bin spacing
Definition: resolution.py:68
plot_name_prefix
cached value of the prefix prepended to the plot name
Definition: resolution.py:91
bool default_is_expert
by default, create expert plots
Definition: resolution.py:42
list default_which_plots
default list of plots to create
Definition: resolution.py:37
referenceFileName
cached value of the reference filename
Definition: resolution.py:103
def __init__(self, quantity_name, bin_spacing, bin_name, bin_unit=None, unit=None, outlier_z_score=None, contact='', plot_name=None, plot_title=None, min_required_entries=None, plot_name_prefix='', plot_name_postfix='', plot_title_postfix='', referenceFileName=None)
Definition: resolution.py:60