Belle II Software  release-08-01-10
pull.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 from tracking.validation.plot import ValidationPlot, compose_axis_label, get_unit
13 
14 # get error function as a np.ufunc vectorised for numpy array
15 from tracking.validation.utilities import erf
16 from tracking.root_utils import root_save_name
17 
18 import collections
19 
20 import numpy as np
21 
22 from tracking.validation.tolerate_missing_key_formatter import TolerateMissingKeyFormatter
23 
24 formatter = TolerateMissingKeyFormatter()
25 
26 
27 class PullAnalysis(object):
28  """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
29 
30  default_outlier_z_score = 5.0
31 
32  default_plot_name = "{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
33 
34  default_plot_title = "{subplot_title} of {quantity_name}{plot_title_postfix}"
35 
36  default_which_plots = [
37  "truths",
38  "estimates",
39  "diag_profile",
40  "diag_scatter",
41  "residuals",
42  "sigmas",
43  "pulls",
44  "p_values",
45  "aux_residual_hist2d",
46  "aux_residual_profile",
47  "aux_pull_hist2d",
48  "aux_pull_profile",
49  ]
50 
51 
52  default_is_expert = True
53 
54  def __init__(
55  self,
56  quantity_name,
57  unit=None,
58  outlier_z_score=None,
59  absolute=False,
60  contact='',
61  plot_name=None,
62  plot_title=None,
63  plot_name_prefix='', # depricated use plot_name instead
64  plot_name_postfix='', # depricated use plot_name instead
65  plot_title_postfix='', # depricated use plot_title instead
66  referenceFileName=None # if set binnings of plots will be read from corresponding histograms
67  ):
68  """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
69 
70 
71  self.quantity_namequantity_name = quantity_name
72 
73  self.unitunit = unit or get_unit(quantity_name)
74 
75 
76  if outlier_z_score is None:
77  self.outlier_z_scoreoutlier_z_score = self.default_outlier_z_scoredefault_outlier_z_score
78  else:
79  self.outlier_z_scoreoutlier_z_score = outlier_z_score
80 
81 
82  self.absoluteabsolute = absolute
83 
84 
85  self.plot_nameplot_name = plot_name
86 
87  self.plot_titleplot_title = plot_title
88 
89 
90  self.plot_name_prefixplot_name_prefix = plot_name_prefix or root_save_name(quantity_name)
91 
92  self.plot_name_postfixplot_name_postfix = plot_name_postfix
93 
94  self.plot_title_postfixplot_title_postfix = plot_title_postfix
95 
96 
97  self._contact_contact = contact
98 
99  self.plotsplots = collections.OrderedDict()
100 
101 
102  self.referenceFileNamereferenceFileName = referenceFileName
103 
104  def analyse(
105  self,
106  truths,
107  estimates,
108  variances=None,
109  auxiliaries={},
110  which_plots=None,
111  is_expert=None
112  ):
113  """Compares the concrete estimate to the truth and generates plots of the estimates, residuals, pulls and p-values.
114  Close indicates if the figure shall be closed after they are saved.
115 
116  Parameters
117  ----------
118  truths : array_like(float)
119  Sample of the true values
120  estimates : array_like(float)
121  Corresponding estimations
122  variances : array_like(float), optional
123  Corresponding variance estimations
124  auxiliaries : Dict[name, array_like(float)], optional
125  Auxiliary variable to show distribution of residuals and pull as function
126  selected_plots : list(str), optional
127  List of analysis plots to be generated. All if not given.
128  Currently valid names are
129  truths, estimates, diag_profile, diag_scatter, residuals,
130  sigmas, pulls, p_values
131  """
132 
133  if is_expert is None:
134  is_expert = self.default_is_expertdefault_is_expert
135 
136  if which_plots is None:
137  which_plots = self.default_which_plotsdefault_which_plots
138 
139  quantity_name = self.quantity_namequantity_name
140 
141  axis_label = compose_axis_label(quantity_name, self.unitunit)
142 
143  plot_name_prefix = self.plot_name_prefixplot_name_prefix
144  outlier_z_score = self.outlier_z_scoreoutlier_z_score
145 
146  absolute = self.absoluteabsolute
147  # Compare only the absolute value by taking the absolute of the curvature truth
148  # and flip the sign of the estimate
149  if absolute:
150  absolute_truths = truths.copy()
151  absolute_estimates = estimates.copy()
152 
153  flip_sign_for = truths < 0
154  absolute_truths[flip_sign_for] = -truths[flip_sign_for]
155  absolute_estimates[flip_sign_for] = -estimates[flip_sign_for]
156 
157  truths = absolute_truths
158  estimates = absolute_estimates
159 
160  quantity_name = 'absolute ' + quantity_name
161 
162  residuals = estimates - truths
163 
164  if variances is not None:
165  sigmas = np.sqrt(variances)
166  pulls = np.divide(residuals, sigmas)
167  p_values = 1.0 - erf(np.abs(pulls))
168 
169  plot_name = self.plot_nameplot_name
170  if plot_name is None:
171  plot_name = self.default_plot_namedefault_plot_name
172 
173  plot_name = formatter.format(plot_name,
174  quantity_name=quantity_name,
175  plot_name_prefix=plot_name_prefix,
176  plot_name_postfix=self.plot_name_postfixplot_name_postfix)
177 
178  plot_title = self.plot_titleplot_title
179  if plot_title is None:
180  plot_title = self.default_plot_titledefault_plot_title
181 
182  plot_title = formatter.format(plot_title,
183  quantity_name=quantity_name,
184  plot_title_postfix=self.plot_title_postfixplot_title_postfix)
185 
186  # Truths #
187 
188 
189  if "truths" in which_plots:
190  # Distribution of truths
191  truths_hist_name = formatter.format(plot_name, subplot_name="truths")
192  truths_hist = ValidationPlot(truths_hist_name, self.referenceFileNamereferenceFileName)
193  truths_hist.hist(truths,
194  outlier_z_score=outlier_z_score,
195  is_expert=is_expert)
196  truths_hist.xlabel = axis_label
197  truths_hist.title = formatter.format(plot_title, subplot_title='True distribution')
198 
199  self.plotsplots['truths'] = truths_hist
200 
201  # Estimates #
202 
203 
204  if "estimates" in which_plots:
205  # Distribution of estimates
206  estimates_hist_name = formatter.format(plot_name, subplot_name="estimates")
207  estimates_hist = ValidationPlot(estimates_hist_name, self.referenceFileNamereferenceFileName)
208  estimates_hist.hist(estimates,
209  outlier_z_score=outlier_z_score,
210  is_expert=is_expert)
211  estimates_hist.xlabel = axis_label
212  estimates_hist.title = formatter.format(plot_title, subplot_title='Estimates distribution')
213 
214  self.plotsplots['estimates'] = estimates_hist
215 
216  # Diagonal plots #
217 
218  if "diag_scatter" in which_plots:
219  # Estimates versus truths scatter plot
220  estimates_by_truths_scatter_name = formatter.format(plot_name, subplot_name="diag_scatter")
221  estimates_by_truths_scatter = ValidationPlot(estimates_by_truths_scatter_name, self.referenceFileNamereferenceFileName)
222  estimates_by_truths_scatter.scatter(truths,
223  estimates,
224  outlier_z_score=outlier_z_score,
225  is_expert=is_expert)
226  estimates_by_truths_scatter.xlabel = 'True ' + axis_label
227  estimates_by_truths_scatter.ylabel = 'Estimated ' + axis_label
228  estimates_by_truths_scatter.title = formatter.format(plot_title, subplot_title='Diagonal scatter plot')
229 
230  self.plotsplots['diag_scatter'] = estimates_by_truths_scatter
231 
232  if "diag_profile" in which_plots:
233  # Estimates versus truths profile plot
234  estimates_by_truths_profile_name = formatter.format(plot_name, subplot_name="diag_profile")
235  estimates_by_truths_profile = ValidationPlot(estimates_by_truths_profile_name, self.referenceFileNamereferenceFileName)
236 
237  # Fill residuals and correct afterwards
238  estimates_by_truths_profile.profile(truths,
239  estimates - truths,
240  outlier_z_score=outlier_z_score,
241  gaus_z_score=4,
242  is_expert=is_expert)
243 
244  # Correct with TF1 - only works because the gaus fit is active.
245  hist = estimates_by_truths_profile.histograms[0]
246  GetBinContent = hist.GetBinContent
247  GetBinCenter = hist.GetBinCenter
248  SetBinContent = hist.SetBinContent
249  for i_bin in range(hist.GetNbinsX() + 2):
250  residual = GetBinContent(i_bin)
251  truth = GetBinCenter(i_bin)
252  if residual != 0:
253  SetBinContent(i_bin, residual + truth)
254 
255  # Reset maximum and minimum
256  estimates_by_truths_profile.histograms[0].SetMaximum()
257  estimates_by_truths_profile.histograms[0].SetMinimum()
258 
259  estimates_by_truths_profile.xlabel = 'True ' + axis_label
260  estimates_by_truths_profile.ylabel = 'Estimated ' + axis_label
261 
262  estimates_by_truths_profile.title = formatter.format(plot_title, subplot_title='Diagonal profile')
263  estimates_by_truths_profile.fit_diag()
264 
265  self.plotsplots['diag_profile'] = estimates_by_truths_profile
266 
267  # Residuals #
268 
269  if "residuals" in which_plots:
270  # Distribution of the residuals
271  residuals_hist_name = formatter.format(plot_name, subplot_name="residuals")
272  residuals_hist = ValidationPlot(residuals_hist_name, self.referenceFileNamereferenceFileName)
273  residuals_hist.hist(residuals,
274  outlier_z_score=outlier_z_score,
275  is_expert=is_expert)
276  residuals_hist.xlabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unitunit)
277  residuals_hist.title = formatter.format(plot_title, subplot_title='Residual distribution')
278 
279  self.plotsplots['residuals'] = residuals_hist
280 
281  # Variances #
282 
283  if variances is not None and "sigmas" in which_plots:
284 
285  # Distribution of sigmas
286  sigmas_hist_name = formatter.format(plot_name, subplot_name="sigmas")
287  sigmas_hist = ValidationPlot(sigmas_hist_name, self.referenceFileNamereferenceFileName)
288  sigmas_hist.hist(sigmas,
289  lower_bound=0,
290  outlier_z_score=outlier_z_score,
291  is_expert=is_expert)
292  sigmas_hist.xlabel = compose_axis_label("#sigma (" + quantity_name + ')', self.unitunit)
293  sigmas_hist.title = formatter.format(plot_title, subplot_title='Estimated variance distribution')
294 
295  self.plotsplots['sigmas'] = sigmas_hist
296 
297  # Pulls #
298 
299  if variances is not None and "pulls" in which_plots:
300 
301  # Distribution of pulls
302  pulls_hist_name = formatter.format(plot_name, subplot_name="pulls")
303  pulls_hist = ValidationPlot(pulls_hist_name, self.referenceFileNamereferenceFileName)
304  pulls_hist.hist(pulls, outlier_z_score=outlier_z_score, is_expert=is_expert)
305  pulls_hist.xlabel = "pull (" + quantity_name + ")"
306  pulls_hist.title = formatter.format(plot_title, subplot_title='Pull distribution')
307  pulls_hist.fit_gaus(z_score=1)
308 
309  self.plotsplots['pulls'] = pulls_hist
310 
311  # P-Values #
312 
313  if variances is not None and "p_values" in which_plots:
314 
315  # Distribution of p_values
316  p_values_hist_name = formatter.format(plot_name, subplot_name="p-values")
317  p_values_hist = ValidationPlot(p_values_hist_name, self.referenceFileNamereferenceFileName)
318  p_values_hist.hist(p_values, lower_bound=0, upper_bound=1, is_expert=is_expert)
319  p_values_hist.xlabel = "p-value (" + quantity_name + ")"
320  p_values_hist.title = formatter.format(plot_title, subplot_title='P-value distribution')
321  p_values_hist.fit_const()
322 
323  self.plotsplots['p_values'] = p_values_hist
324 
325  # Auxialliary variables #
326  # ##################### #
327  for aux_name, aux_values in auxiliaries.items():
328  if "aux_residual_hist2d" in which_plots or "aux" in which_plots:
329  # Distribution of the residuals over auxiliary variable
330  aux_residuals_hist2d_name = formatter.format(plot_name,
331  subplot_name="residuals over {}".format(aux_name))
332  aux_residuals_hist2d = ValidationPlot(aux_residuals_hist2d_name, self.referenceFileNamereferenceFileName)
333  aux_residuals_hist2d.hist2d(aux_values,
334  residuals,
335  outlier_z_score=outlier_z_score,
336  allow_discrete=True,
337  is_expert=is_expert)
338  aux_residuals_hist2d.xlabel = compose_axis_label(aux_name)
339  aux_residuals_hist2d.ylabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unitunit)
340  aux_residuals_hist2d.title = formatter.format(plot_title,
341  subplot_title='Residual distribution over {}'.format(aux_name))
342 
343  self.plotsplots['aux_residuals_hist2d_' + aux_name] = aux_residuals_hist2d
344 
345  if "aux_residual_profile" in which_plots or "aux" in which_plots:
346  # Distribution of the residuals over auxiliary variable
347  aux_residuals_profile_name = formatter.format(plot_name,
348  subplot_name="residuals profile over {}".format(aux_name))
349  aux_residuals_profile = ValidationPlot(aux_residuals_profile_name, self.referenceFileNamereferenceFileName)
350  aux_residuals_profile.profile(aux_values,
351  residuals,
352  outlier_z_score=outlier_z_score,
353  gaus_z_score=1.5,
354  allow_discrete=True,
355  is_expert=is_expert,
356  )
357  aux_residuals_profile.xlabel = compose_axis_label(aux_name)
358  aux_residuals_profile.ylabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unitunit)
359  aux_residuals_profile.title = formatter.format(plot_title,
360  subplot_title='Residual profile over {}'.format(aux_name))
361 
362  self.plotsplots['aux_residuals_profile_' + aux_name] = aux_residuals_profile
363 
364  if variances is not None and ("aux_pull_hist2d" in which_plots or "aux" in which_plots):
365  # Distribution of the pulls over auxiliary variable
366  aux_pulls_hist2d_name = formatter.format(plot_name,
367  subplot_name="pulls over {}".format(aux_name))
368  aux_pulls_hist2d = ValidationPlot(aux_pulls_hist2d_name, self.referenceFileNamereferenceFileName)
369  aux_pulls_hist2d.hist2d(aux_values,
370  pulls,
371  outlier_z_score=outlier_z_score,
372  allow_discrete=True,
373  is_expert=is_expert)
374  aux_pulls_hist2d.xlabel = compose_axis_label(aux_name)
375  aux_pulls_hist2d.ylabel = "pull (" + quantity_name + ")"
376  aux_pulls_hist2d.title = formatter.format(plot_title,
377  subplot_title='Pull scatter over {}'.format(aux_name))
378 
379  self.plotsplots['aux_pulls_hist2d_' + aux_name] = aux_pulls_hist2d
380 
381  if variances is not None and ("aux_pull_profile" in which_plots or "aux" in which_plots):
382  # Distribution of the pulls over auxiliary variable
383  aux_pulls_profile_name = formatter.format(plot_name,
384  subplot_name="pull profile over {}".format(aux_name))
385  aux_pulls_profile = ValidationPlot(aux_pulls_profile_name, self.referenceFileNamereferenceFileName)
386  aux_pulls_profile.profile(aux_values,
387  pulls,
388  outlier_z_score=outlier_z_score,
389  gaus_z_score=1.5,
390  allow_discrete=True,
391  is_expert=is_expert)
392  aux_pulls_profile.xlabel = compose_axis_label(aux_name)
393  aux_pulls_profile.ylabel = "pull (" + quantity_name + ")"
394  aux_pulls_profile.title = formatter.format(plot_title,
395  subplot_title='Pull profile over {}'.format(aux_name))
396 
397  self.plotsplots['aux_pulls_profile_' + aux_name] = aux_pulls_profile
398 
399 
401 
402  @property
403  def contact(self):
404  """ returns the contact """
405  return self._contact_contact
406 
407  @contact.setter
408  def contact(self, contact):
409  """
410  sets the contact
411 
412  parameters:
413  contact: new contact information
414  """
415  self._contact_contact = contact
416  for validation_plot in list(self.plotsplots.values()):
417  validation_plot.contact = contact
418 
419  def write(self, tDirectory=None):
420  """ Write all validation plot to the given Root directory
421  parameters:
422  tDirectory - the root directory were to write to
423  """
424  for validation_plot in list(self.plotsplots.values()):
425  validation_plot.write(tDirectory)
def __init__(self, quantity_name, unit=None, outlier_z_score=None, absolute=False, contact='', plot_name=None, plot_title=None, plot_name_prefix='', plot_name_postfix='', plot_title_postfix='', referenceFileName=None # if set binnings of plots will be read from corresponding histograms)
Definition: pull.py:67
quantity_name
name of the quantity the analysis is performed on
Definition: pull.py:71
contact
Forward the contract to all plots by reassigning the contact.
Definition: pull.py:400
plot_title
title of the plot
Definition: pull.py:87
outlier_z_score
the outlier score defines in terms of how many std deviations a data point is considered as an outlie...
Definition: pull.py:77
plots
dictionary to store the plots
Definition: pull.py:99
float default_outlier_z_score
default outlier z score
Definition: pull.py:30
unit
unit the quanitity is given in
Definition: pull.py:73
def analyse(self, truths, estimates, variances=None, auxiliaries={}, which_plots=None, is_expert=None)
Definition: pull.py:112
def write(self, tDirectory=None)
Definition: pull.py:419
_contact
contact information
Definition: pull.py:97
plot_name_postfix
post fix to be append after the plot name
Definition: pull.py:92
def contact(self, contact)
Definition: pull.py:408
plot_title_postfix
postfix to be appended after the title
Definition: pull.py:94
plot_name_prefix
prefix to be prepended to the plot name
Definition: pull.py:90
string default_plot_title
default plot title
Definition: pull.py:34
bool default_is_expert
if true the plots created here are declared as expert plots in the validation
Definition: pull.py:52
list default_which_plots
default list of plots to be created in this analysis
Definition: pull.py:36
referenceFileName
name of the reference file, if set the binnings of plots will be read from the corresponding object i...
Definition: pull.py:102
string default_plot_name
default plot name
Definition: pull.py:32
absolute
if true only the absolute value is compared
Definition: pull.py:82