Belle II Software development
pull.py
1#!/usr/bin/env python3
2
3
10
11from tracking.validation.plot import ValidationPlot, compose_axis_label, get_unit
12
13# get error function as a np.ufunc vectorised for numpy array
15from tracking.root_utils import root_save_name
16
17import collections
18
19import numpy as np
20
21from tracking.validation.tolerate_missing_key_formatter import TolerateMissingKeyFormatter
22
23formatter = TolerateMissingKeyFormatter()
24
25
27 """Performs a comparison of an estimated quantity to their truths by generating standardized validation plots."""
28
29 default_outlier_z_score = 5.0
30
31 default_plot_name = "{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
32
33 default_plot_title = "{subplot_title} of {quantity_name}{plot_title_postfix}"
34
35 default_which_plots = [
36 "truths",
37 "estimates",
38 "diag_profile",
39 "diag_scatter",
40 "residuals",
41 "sigmas",
42 "pulls",
43 "p_values",
44 "aux_residual_hist2d",
45 "aux_residual_profile",
46 "aux_pull_hist2d",
47 "aux_pull_profile",
48 ]
49
50
51 default_is_expert = True
52
54 self,
55 quantity_name,
56 unit=None,
57 outlier_z_score=None,
58 absolute=False,
59 contact='',
60 plot_name=None,
61 plot_title=None,
62 plot_name_prefix='', # deprecated use plot_name instead
63 plot_name_postfix='', # deprecated use plot_name instead
64 plot_title_postfix='', # deprecated use plot_title instead
65 referenceFileName=None # if set binnings of plots will be read from corresponding histograms
66 ):
67 """Performs a comparison of an estimated quantity to their truths by generating standardized validation plots."""
68
69
70 self.quantity_name = quantity_name
71
72 self.unit = unit or get_unit(quantity_name)
73
74
75 if outlier_z_score is None:
77 else:
78 self.outlier_z_score = outlier_z_score
79
80
81 self.absolute = absolute
82
83
84 self.plot_name = plot_name
85
86 self.plot_title = plot_title
87
88
89 self.plot_name_prefix = plot_name_prefix or root_save_name(quantity_name)
90
91 self.plot_name_postfix = plot_name_postfix
92
93 self.plot_title_postfix = plot_title_postfix
94
95
96 self._contact = contact
97
98 self.plots = collections.OrderedDict()
99
100
101 self.referenceFileName = referenceFileName
102
104 self,
105 truths,
106 estimates,
107 variances=None,
108 auxiliaries={},
109 which_plots=None,
110 is_expert=None
111 ):
112 """Compares the concrete estimate to the truth and generates plots of the estimates, residuals, pulls and p-values.
113 Close indicates if the figure shall be closed after they are saved.
114
115 Parameters
116 ----------
117 truths : array_like(float)
118 Sample of the true values
119 estimates : array_like(float)
120 Corresponding estimations
121 variances : array_like(float), optional
122 Corresponding variance estimations
123 auxiliaries : Dict[name, array_like(float)], optional
124 Auxiliary variable to show distribution of residuals and pull as function
125 selected_plots : list(str), optional
126 List of analysis plots to be generated. All if not given.
127 Currently valid names are
128 truths, estimates, diag_profile, diag_scatter, residuals,
129 sigmas, pulls, p_values
130 """
131
132 if is_expert is None:
133 is_expert = self.default_is_expert
134
135 if which_plots is None:
136 which_plots = self.default_which_plots
137
138 quantity_name = self.quantity_name
139
140 axis_label = compose_axis_label(quantity_name, self.unit)
141
142 plot_name_prefix = self.plot_name_prefix
143 outlier_z_score = self.outlier_z_score
144
145 absolute = self.absolute
146 # Compare only the absolute value by taking the absolute of the curvature truth
147 # and flip the sign of the estimate
148 if absolute:
149 absolute_truths = truths.copy()
150 absolute_estimates = estimates.copy()
151
152 flip_sign_for = truths < 0
153 absolute_truths[flip_sign_for] = -truths[flip_sign_for]
154 absolute_estimates[flip_sign_for] = -estimates[flip_sign_for]
155
156 truths = absolute_truths
157 estimates = absolute_estimates
158
159 quantity_name = 'absolute ' + quantity_name
160
161 residuals = estimates - truths
162
163 if variances is not None:
164 sigmas = np.sqrt(variances)
165 pulls = np.divide(residuals, sigmas)
166 p_values = 1.0 - erf(np.abs(pulls))
167
168 plot_name = self.plot_name
169 if plot_name is None:
170 plot_name = self.default_plot_name
171
172 plot_name = formatter.format(plot_name,
173 quantity_name=quantity_name,
174 plot_name_prefix=plot_name_prefix,
175 plot_name_postfix=self.plot_name_postfix)
176
177 plot_title = self.plot_title
178 if plot_title is None:
179 plot_title = self.default_plot_title
180
181 plot_title = formatter.format(plot_title,
182 quantity_name=quantity_name,
183 plot_title_postfix=self.plot_title_postfix)
184
185 # Truths #
186
187
188 if "truths" in which_plots:
189 # Distribution of truths
190 truths_hist_name = formatter.format(plot_name, subplot_name="truths")
191 truths_hist = ValidationPlot(truths_hist_name, self.referenceFileName)
192 truths_hist.hist(truths,
193 outlier_z_score=outlier_z_score,
194 is_expert=is_expert)
195 truths_hist.xlabel = axis_label
196 truths_hist.title = formatter.format(plot_title, subplot_title='True distribution')
197
198 self.plots['truths'] = truths_hist
199
200 # Estimates #
201
202
203 if "estimates" in which_plots:
204 # Distribution of estimates
205 estimates_hist_name = formatter.format(plot_name, subplot_name="estimates")
206 estimates_hist = ValidationPlot(estimates_hist_name, self.referenceFileName)
207 estimates_hist.hist(estimates,
208 outlier_z_score=outlier_z_score,
209 is_expert=is_expert)
210 estimates_hist.xlabel = axis_label
211 estimates_hist.title = formatter.format(plot_title, subplot_title='Estimates distribution')
212
213 self.plots['estimates'] = estimates_hist
214
215 # Diagonal plots #
216
217 if "diag_scatter" in which_plots:
218 # Estimates versus truths scatter plot
219 estimates_by_truths_scatter_name = formatter.format(plot_name, subplot_name="diag_scatter")
220 estimates_by_truths_scatter = ValidationPlot(estimates_by_truths_scatter_name, self.referenceFileName)
221 estimates_by_truths_scatter.scatter(truths,
222 estimates,
223 outlier_z_score=outlier_z_score,
224 is_expert=is_expert)
225 estimates_by_truths_scatter.xlabel = 'True ' + axis_label
226 estimates_by_truths_scatter.ylabel = 'Estimated ' + axis_label
227 estimates_by_truths_scatter.title = formatter.format(plot_title, subplot_title='Diagonal scatter plot')
228
229 self.plots['diag_scatter'] = estimates_by_truths_scatter
230
231 if "diag_profile" in which_plots:
232 # Estimates versus truths profile plot
233 estimates_by_truths_profile_name = formatter.format(plot_name, subplot_name="diag_profile")
234 estimates_by_truths_profile = ValidationPlot(estimates_by_truths_profile_name, self.referenceFileName)
235
236 # Fill residuals and correct afterwards
237 estimates_by_truths_profile.profile(truths,
238 estimates - truths,
239 outlier_z_score=outlier_z_score,
240 gaus_z_score=4,
241 is_expert=is_expert)
242
243 # Correct with TF1 - only works because the Gaussian fit is active.
244 hist = estimates_by_truths_profile.histograms[0]
245 GetBinContent = hist.GetBinContent
246 GetBinCenter = hist.GetBinCenter
247 SetBinContent = hist.SetBinContent
248 for i_bin in range(hist.GetNbinsX() + 2):
249 residual = GetBinContent(i_bin)
250 truth = GetBinCenter(i_bin)
251 if residual != 0:
252 SetBinContent(i_bin, residual + truth)
253
254 # Reset maximum and minimum
255 estimates_by_truths_profile.histograms[0].SetMaximum()
256 estimates_by_truths_profile.histograms[0].SetMinimum()
257
258 estimates_by_truths_profile.xlabel = 'True ' + axis_label
259 estimates_by_truths_profile.ylabel = 'Estimated ' + axis_label
260
261 estimates_by_truths_profile.title = formatter.format(plot_title, subplot_title='Diagonal profile')
262 estimates_by_truths_profile.fit_diag()
263
264 self.plots['diag_profile'] = estimates_by_truths_profile
265
266 # Residuals #
267
268 if "residuals" in which_plots:
269 # Distribution of the residuals
270 residuals_hist_name = formatter.format(plot_name, subplot_name="residuals")
271 residuals_hist = ValidationPlot(residuals_hist_name, self.referenceFileName)
272 residuals_hist.hist(residuals,
273 outlier_z_score=outlier_z_score,
274 is_expert=is_expert)
275 residuals_hist.xlabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
276 residuals_hist.title = formatter.format(plot_title, subplot_title='Residual distribution')
277
278 self.plots['residuals'] = residuals_hist
279
280 # Variances #
281
282 if variances is not None and "sigmas" in which_plots:
283
284 # Distribution of sigmas
285 sigmas_hist_name = formatter.format(plot_name, subplot_name="sigmas")
286 sigmas_hist = ValidationPlot(sigmas_hist_name, self.referenceFileName)
287 sigmas_hist.hist(sigmas,
288 lower_bound=0,
289 outlier_z_score=outlier_z_score,
290 is_expert=is_expert)
291 sigmas_hist.xlabel = compose_axis_label("#sigma (" + quantity_name + ')', self.unit)
292 sigmas_hist.title = formatter.format(plot_title, subplot_title='Estimated variance distribution')
293
294 self.plots['sigmas'] = sigmas_hist
295
296 # Pulls #
297
298 if variances is not None and "pulls" in which_plots:
299
300 # Distribution of pulls
301 pulls_hist_name = formatter.format(plot_name, subplot_name="pulls")
302 pulls_hist = ValidationPlot(pulls_hist_name, self.referenceFileName)
303 pulls_hist.hist(pulls, outlier_z_score=outlier_z_score, is_expert=is_expert)
304 pulls_hist.xlabel = "pull (" + quantity_name + ")"
305 pulls_hist.title = formatter.format(plot_title, subplot_title='Pull distribution')
306 pulls_hist.fit_gaus(z_score=1)
307
308 self.plots['pulls'] = pulls_hist
309
310 # P-Values #
311
312 if variances is not None and "p_values" in which_plots:
313
314 # Distribution of p_values
315 p_values_hist_name = formatter.format(plot_name, subplot_name="p-values")
316 p_values_hist = ValidationPlot(p_values_hist_name, self.referenceFileName)
317 p_values_hist.hist(p_values, lower_bound=0, upper_bound=1, is_expert=is_expert)
318 p_values_hist.xlabel = "p-value (" + quantity_name + ")"
319 p_values_hist.title = formatter.format(plot_title, subplot_title='P-value distribution')
320 p_values_hist.fit_const()
321
322 self.plots['p_values'] = p_values_hist
323
324 # Auxialliary variables #
325 # ##################### #
326 for aux_name, aux_values in auxiliaries.items():
327 if "aux_residual_hist2d" in which_plots or "aux" in which_plots:
328 # Distribution of the residuals over auxiliary variable
329 aux_residuals_hist2d_name = formatter.format(plot_name,
330 subplot_name=f"residuals over {aux_name}")
331 aux_residuals_hist2d = ValidationPlot(aux_residuals_hist2d_name, self.referenceFileName)
332 aux_residuals_hist2d.hist2d(aux_values,
333 residuals,
334 outlier_z_score=outlier_z_score,
335 allow_discrete=True,
336 is_expert=is_expert)
337 aux_residuals_hist2d.xlabel = compose_axis_label(aux_name)
338 aux_residuals_hist2d.ylabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
339 aux_residuals_hist2d.title = formatter.format(plot_title,
340 subplot_title=f'Residual distribution over {aux_name}')
341
342 self.plots['aux_residuals_hist2d_' + aux_name] = aux_residuals_hist2d
343
344 if "aux_residual_profile" in which_plots or "aux" in which_plots:
345 # Distribution of the residuals over auxiliary variable
346 aux_residuals_profile_name = formatter.format(plot_name,
347 subplot_name=f"residuals profile over {aux_name}")
348 aux_residuals_profile = ValidationPlot(aux_residuals_profile_name, self.referenceFileName)
349 aux_residuals_profile.profile(aux_values,
350 residuals,
351 outlier_z_score=outlier_z_score,
352 gaus_z_score=1.5,
353 allow_discrete=True,
354 is_expert=is_expert,
355 )
356 aux_residuals_profile.xlabel = compose_axis_label(aux_name)
357 aux_residuals_profile.ylabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
358 aux_residuals_profile.title = formatter.format(plot_title,
359 subplot_title=f'Residual profile over {aux_name}')
360
361 self.plots['aux_residuals_profile_' + aux_name] = aux_residuals_profile
362
363 if variances is not None and ("aux_pull_hist2d" in which_plots or "aux" in which_plots):
364 # Distribution of the pulls over auxiliary variable
365 aux_pulls_hist2d_name = formatter.format(plot_name,
366 subplot_name=f"pulls over {aux_name}")
367 aux_pulls_hist2d = ValidationPlot(aux_pulls_hist2d_name, self.referenceFileName)
368 aux_pulls_hist2d.hist2d(aux_values,
369 pulls,
370 outlier_z_score=outlier_z_score,
371 allow_discrete=True,
372 is_expert=is_expert)
373 aux_pulls_hist2d.xlabel = compose_axis_label(aux_name)
374 aux_pulls_hist2d.ylabel = "pull (" + quantity_name + ")"
375 aux_pulls_hist2d.title = formatter.format(plot_title,
376 subplot_title=f'Pull scatter over {aux_name}')
377
378 self.plots['aux_pulls_hist2d_' + aux_name] = aux_pulls_hist2d
379
380 if variances is not None and ("aux_pull_profile" in which_plots or "aux" in which_plots):
381 # Distribution of the pulls over auxiliary variable
382 aux_pulls_profile_name = formatter.format(plot_name,
383 subplot_name=f"pull profile over {aux_name}")
384 aux_pulls_profile = ValidationPlot(aux_pulls_profile_name, self.referenceFileName)
385 aux_pulls_profile.profile(aux_values,
386 pulls,
387 outlier_z_score=outlier_z_score,
388 gaus_z_score=1.5,
389 allow_discrete=True,
390 is_expert=is_expert)
391 aux_pulls_profile.xlabel = compose_axis_label(aux_name)
392 aux_pulls_profile.ylabel = "pull (" + quantity_name + ")"
393 aux_pulls_profile.title = formatter.format(plot_title,
394 subplot_title=f'Pull profile over {aux_name}')
395
396 self.plots['aux_pulls_profile_' + aux_name] = aux_pulls_profile
397
398
400
401 @property
402 def contact(self):
403 """ returns the contact """
404 return self._contact
405
406 @contact.setter
407 def contact(self, contact):
408 """
409 sets the contact
410
411 parameters:
412 contact: new contact information
413 """
414 self._contact = contact
415 for validation_plot in list(self.plots.values()):
416 validation_plot.contact = contact
417
418 def write(self, tDirectory=None):
419 """ Write all validation plot to the given Root directory
420 parameters:
421 tDirectory - the root directory were to write to
422 """
423 for validation_plot in list(self.plots.values()):
424 validation_plot.write(tDirectory)
def __init__(self, quantity_name, unit=None, outlier_z_score=None, absolute=False, contact='', plot_name=None, plot_title=None, plot_name_prefix='', plot_name_postfix='', plot_title_postfix='', referenceFileName=None # if set binnings of plots will be read from corresponding histograms)
Definition: pull.py:66
quantity_name
name of the quantity the analysis is performed on
Definition: pull.py:70
contact
Forward the contract to all plots by reassigning the contact.
Definition: pull.py:399
plot_title
title of the plot
Definition: pull.py:86
outlier_z_score
the outlier score defines in terms of how many std deviations a data point is considered as an outlie...
Definition: pull.py:76
plots
dictionary to store the plots
Definition: pull.py:98
float default_outlier_z_score
default outlier z score
Definition: pull.py:29
unit
unit the quantity is given in
Definition: pull.py:72
def write(self, tDirectory=None)
Definition: pull.py:418
_contact
contact information
Definition: pull.py:96
str default_plot_title
default plot title
Definition: pull.py:33
plot_name_postfix
post fix to be append after the plot name
Definition: pull.py:91
def contact(self, contact)
Definition: pull.py:407
plot_title_postfix
postfix to be appended after the title
Definition: pull.py:93
plot_name_prefix
prefix to be prepended to the plot name
Definition: pull.py:89
bool default_is_expert
if true the plots created here are declared as expert plots in the validation
Definition: pull.py:51
list default_which_plots
default list of plots to be created in this analysis
Definition: pull.py:35
str default_plot_name
default plot name
Definition: pull.py:31
referenceFileName
name of the reference file, if set the binnings of plots will be read from the corresponding object i...
Definition: pull.py:101
absolute
if true only the absolute value is compared
Definition: pull.py:81