Belle II Software  release-05-02-19
pull.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 from .plot import ValidationPlot, compose_axis_label, get_unit
5 
6 # get error function as a np.ufunc vectorised for numpy array
7 from .utilities import erf
8 from tracking.root_utils import root_save_name
9 
10 import math
11 import collections
12 
13 import numpy as np
14 
15 from .tolerate_missing_key_formatter import TolerateMissingKeyFormatter
16 
17 formatter = TolerateMissingKeyFormatter()
18 
19 
20 class PullAnalysis(object):
21  """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
22 
23  default_outlier_z_score = 5.0
24 
25  default_plot_name = "{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
26 
27  default_plot_title = "{subplot_title} of {quantity_name}{plot_title_postfix}"
28 
29  default_which_plots = [
30  "truths",
31  "estimates",
32  "diag_profile",
33  "diag_scatter",
34  "residuals",
35  "sigmas",
36  "pulls",
37  "p_values",
38  "aux_residual_hist2d",
39  "aux_residual_profile",
40  "aux_pull_hist2d",
41  "aux_pull_profile",
42  ]
43 
44 
45  default_is_expert = True
46 
47  def __init__(
48  self,
49  quantity_name,
50  unit=None,
51  outlier_z_score=None,
52  absolute=False,
53  contact='',
54  plot_name=None,
55  plot_title=None,
56  plot_name_prefix='', # depricated use plot_name instead
57  plot_name_postfix='', # depricated use plot_name instead
58  plot_title_postfix='', # depricated use plot_title instead
59  referenceFileName=None # if set binnings of plots will be read from corresponding histograms
60  ):
61  """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
62 
63 
64  self.quantity_name = quantity_name
65 
66  self.unit = unit or get_unit(quantity_name)
67 
68 
69  if outlier_z_score is None:
71  else:
72  self.outlier_z_score = outlier_z_score
73 
74 
75  self.absolute = absolute
76 
77 
78  self.plot_name = plot_name
79 
80  self.plot_title = plot_title
81 
82 
83  self.plot_name_prefix = plot_name_prefix or root_save_name(quantity_name)
84 
85  self.plot_name_postfix = plot_name_postfix
86 
87  self.plot_title_postfix = plot_title_postfix
88 
89 
90  self._contact = contact
91 
92  self.plots = collections.OrderedDict()
93 
94 
95  self.referenceFileName = referenceFileName
96 
97  def analyse(
98  self,
99  truths,
100  estimates,
101  variances=None,
102  auxiliaries={},
103  which_plots=None,
104  is_expert=None
105  ):
106  """Compares the concrete estimate to the truth and generates plots of the estimates, residuals, pulls and p-values.
107  Close indicates if the figure shall be closed after they are saved.
108 
109  Parameters
110  ----------
111  truths : array_like(float)
112  Sample of the true values
113  estimates : array_like(float)
114  Corresponding estimations
115  variances : array_like(float), optional
116  Corresponding variance estimations
117  auxiliaries : Dict[name, array_like(float)], optional
118  Auxiliary variable to show distribution of residuals and pull as function
119  selected_plots : list(str), optional
120  List of analysis plots to be generated. All if not given.
121  Currently valid names are
122  truths, estimates, diag_profile, diag_scatter, residuals,
123  sigmas, pulls, p_values
124  """
125 
126  if is_expert is None:
127  is_expert = self.default_is_expert
128 
129  if which_plots is None:
130  which_plots = self.default_which_plots
131 
132  quantity_name = self.quantity_name
133 
134  axis_label = compose_axis_label(quantity_name, self.unit)
135 
136  plot_name_prefix = self.plot_name_prefix
137  outlier_z_score = self.outlier_z_score
138 
139  absolute = self.absolute
140  # Compare only the absolute value by taking the absolute of the curvature truth
141  # and flip the sign of the estimate
142  if absolute:
143  absolute_truths = truths.copy()
144  absolute_estimates = estimates.copy()
145 
146  flip_sign_for = truths < 0
147  absolute_truths[flip_sign_for] = -truths[flip_sign_for]
148  absolute_estimates[flip_sign_for] = -estimates[flip_sign_for]
149 
150  truths = absolute_truths
151  estimates = absolute_estimates
152 
153  quantity_name = 'absolute ' + quantity_name
154 
155  residuals = estimates - truths
156 
157  if variances is not None:
158  sigmas = np.sqrt(variances)
159  pulls = np.divide(residuals, sigmas)
160  p_values = 1.0 - erf(np.abs(pulls))
161 
162  plot_name = self.plot_name
163  if plot_name is None:
164  plot_name = self.default_plot_name
165 
166  plot_name = formatter.format(plot_name,
167  quantity_name=quantity_name,
168  plot_name_prefix=plot_name_prefix,
169  plot_name_postfix=self.plot_name_postfix)
170 
171  plot_title = self.plot_title
172  if plot_title is None:
173  plot_title = self.default_plot_title
174 
175  plot_title = formatter.format(plot_title,
176  quantity_name=quantity_name,
177  plot_title_postfix=self.plot_title_postfix)
178 
179  # Truths #
180 
181 
182  if "truths" in which_plots:
183  # Distribution of truths
184  truths_hist_name = formatter.format(plot_name, subplot_name="truths")
185  truths_hist = ValidationPlot(truths_hist_name, self.referenceFileName)
186  truths_hist.hist(truths,
187  outlier_z_score=outlier_z_score,
188  is_expert=is_expert)
189  truths_hist.xlabel = axis_label
190  truths_hist.title = formatter.format(plot_title, subplot_title='True distribution')
191 
192  self.plots['truths'] = truths_hist
193 
194  # Estimates #
195 
196 
197  if "estimates" in which_plots:
198  # Distribution of estimates
199  estimates_hist_name = formatter.format(plot_name, subplot_name="estimates")
200  estimates_hist = ValidationPlot(estimates_hist_name, self.referenceFileName)
201  estimates_hist.hist(estimates,
202  outlier_z_score=outlier_z_score,
203  is_expert=is_expert)
204  estimates_hist.xlabel = axis_label
205  estimates_hist.title = formatter.format(plot_title, subplot_title='Estimates distribution')
206 
207  self.plots['estimates'] = estimates_hist
208 
209  # Diagonal plots #
210 
211  if "diag_scatter" in which_plots:
212  # Estimates versus truths scatter plot
213  estimates_by_truths_scatter_name = formatter.format(plot_name, subplot_name="diag_scatter")
214  estimates_by_truths_scatter = ValidationPlot(estimates_by_truths_scatter_name, self.referenceFileName)
215  estimates_by_truths_scatter.scatter(truths,
216  estimates,
217  outlier_z_score=outlier_z_score,
218  is_expert=is_expert)
219  estimates_by_truths_scatter.xlabel = 'True ' + axis_label
220  estimates_by_truths_scatter.ylabel = 'Estimated ' + axis_label
221  estimates_by_truths_scatter.title = formatter.format(plot_title, subplot_title='Diagonal scatter plot')
222 
223  self.plots['diag_scatter'] = estimates_by_truths_scatter
224 
225  if "diag_profile" in which_plots:
226  # Estimates versus truths profile plot
227  estimates_by_truths_profile_name = formatter.format(plot_name, subplot_name="diag_profile")
228  estimates_by_truths_profile = ValidationPlot(estimates_by_truths_profile_name, self.referenceFileName)
229 
230  # Fill residuals and correct afterwards
231  estimates_by_truths_profile.profile(truths,
232  estimates - truths,
233  outlier_z_score=outlier_z_score,
234  gaus_z_score=4,
235  is_expert=is_expert)
236 
237  # Correct with TF1 - only works because the gaus fit is active.
238  hist = estimates_by_truths_profile.histograms[0]
239  GetBinContent = hist.GetBinContent
240  GetBinCenter = hist.GetBinCenter
241  SetBinContent = hist.SetBinContent
242  for i_bin in range(hist.GetNbinsX() + 2):
243  residual = GetBinContent(i_bin)
244  truth = GetBinCenter(i_bin)
245  if residual != 0:
246  SetBinContent(i_bin, residual + truth)
247 
248  # Reset maximum and minimum
249  estimates_by_truths_profile.histograms[0].SetMaximum()
250  estimates_by_truths_profile.histograms[0].SetMinimum()
251 
252  estimates_by_truths_profile.xlabel = 'True ' + axis_label
253  estimates_by_truths_profile.ylabel = 'Estimated ' + axis_label
254 
255  estimates_by_truths_profile.title = formatter.format(plot_title, subplot_title='Diagonal profile')
256  estimates_by_truths_profile.fit_diag()
257 
258  self.plots['diag_profile'] = estimates_by_truths_profile
259 
260  # Residuals #
261 
262  if "residuals" in which_plots:
263  # Distribution of the residuals
264  residuals_hist_name = formatter.format(plot_name, subplot_name="residuals")
265  residuals_hist = ValidationPlot(residuals_hist_name, self.referenceFileName)
266  residuals_hist.hist(residuals,
267  outlier_z_score=outlier_z_score,
268  is_expert=is_expert)
269  residuals_hist.xlabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
270  residuals_hist.title = formatter.format(plot_title, subplot_title='Residual distribution')
271 
272  self.plots['residuals'] = residuals_hist
273 
274  # Variances #
275 
276  if variances is not None and "sigmas" in which_plots:
277 
278  # Distribution of sigmas
279  sigmas_hist_name = formatter.format(plot_name, subplot_name="sigmas")
280  sigmas_hist = ValidationPlot(sigmas_hist_name, self.referenceFileName)
281  sigmas_hist.hist(sigmas,
282  lower_bound=0,
283  outlier_z_score=outlier_z_score,
284  is_expert=is_expert)
285  sigmas_hist.xlabel = compose_axis_label("#sigma (" + quantity_name + ')', self.unit)
286  sigmas_hist.title = formatter.format(plot_title, subplot_title='Estimated variance distribution')
287 
288  self.plots['sigmas'] = sigmas_hist
289 
290  # Pulls #
291 
292  if variances is not None and "pulls" in which_plots:
293 
294  # Distribution of pulls
295  pulls_hist_name = formatter.format(plot_name, subplot_name="pulls")
296  pulls_hist = ValidationPlot(pulls_hist_name, self.referenceFileName)
297  pulls_hist.hist(pulls, outlier_z_score=outlier_z_score, is_expert=is_expert)
298  pulls_hist.xlabel = "pull (" + quantity_name + ")"
299  pulls_hist.title = formatter.format(plot_title, subplot_title='Pull distribution')
300  pulls_hist.fit_gaus(z_score=1)
301 
302  self.plots['pulls'] = pulls_hist
303 
304  # P-Values #
305 
306  if variances is not None and "p_values" in which_plots:
307 
308  # Distribution of p_values
309  p_values_hist_name = formatter.format(plot_name, subplot_name="p-values")
310  p_values_hist = ValidationPlot(p_values_hist_name, self.referenceFileName)
311  p_values_hist.hist(p_values, lower_bound=0, upper_bound=1, is_expert=is_expert)
312  p_values_hist.xlabel = "p-value (" + quantity_name + ")"
313  p_values_hist.title = formatter.format(plot_title, subplot_title='P-value distribution')
314  p_values_hist.fit_const()
315 
316  self.plots['p_values'] = p_values_hist
317 
318  # Auxialliary variables #
319  # ##################### #
320  for aux_name, aux_values in auxiliaries.items():
321  if "aux_residual_hist2d" in which_plots or "aux" in which_plots:
322  # Distribution of the residuals over auxiliary variable
323  aux_residuals_hist2d_name = formatter.format(plot_name,
324  subplot_name="residuals over {}".format(aux_name))
325  aux_residuals_hist2d = ValidationPlot(aux_residuals_hist2d_name, self.referenceFileName)
326  aux_residuals_hist2d.hist2d(aux_values,
327  residuals,
328  outlier_z_score=outlier_z_score,
329  allow_discrete=True,
330  is_expert=is_expert)
331  aux_residuals_hist2d.xlabel = compose_axis_label(aux_name)
332  aux_residuals_hist2d.ylabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
333  aux_residuals_hist2d.title = formatter.format(plot_title,
334  subplot_title='Residual distribution over {}'.format(aux_name))
335 
336  self.plots['aux_residuals_hist2d_' + aux_name] = aux_residuals_hist2d
337 
338  if "aux_residual_profile" in which_plots or "aux" in which_plots:
339  # Distribution of the residuals over auxiliary variable
340  aux_residuals_profile_name = formatter.format(plot_name,
341  subplot_name="residuals profile over {}".format(aux_name))
342  aux_residuals_profile = ValidationPlot(aux_residuals_profile_name, self.referenceFileName)
343  aux_residuals_profile.profile(aux_values,
344  residuals,
345  outlier_z_score=outlier_z_score,
346  gaus_z_score=1.5,
347  allow_discrete=True,
348  is_expert=is_expert,
349  )
350  aux_residuals_profile.xlabel = compose_axis_label(aux_name)
351  aux_residuals_profile.ylabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
352  aux_residuals_profile.title = formatter.format(plot_title,
353  subplot_title='Residual profile over {}'.format(aux_name))
354 
355  self.plots['aux_residuals_profile_' + aux_name] = aux_residuals_profile
356 
357  if variances is not None and ("aux_pull_hist2d" in which_plots or "aux" in which_plots):
358  # Distribution of the pulls over auxiliary variable
359  aux_pulls_hist2d_name = formatter.format(plot_name,
360  subplot_name="pulls over {}".format(aux_name))
361  aux_pulls_hist2d = ValidationPlot(aux_pulls_hist2d_name, self.referenceFileName)
362  aux_pulls_hist2d.hist2d(aux_values,
363  pulls,
364  outlier_z_score=outlier_z_score,
365  allow_discrete=True,
366  is_expert=is_expert)
367  aux_pulls_hist2d.xlabel = compose_axis_label(aux_name)
368  aux_pulls_hist2d.ylabel = "pull (" + quantity_name + ")"
369  aux_pulls_hist2d.title = formatter.format(plot_title,
370  subplot_title='Pull scatter over {}'.format(aux_name))
371 
372  self.plots['aux_pulls_hist2d_' + aux_name] = aux_pulls_hist2d
373 
374  if variances is not None and ("aux_pull_profile" in which_plots or "aux" in which_plots):
375  # Distribution of the pulls over auxiliary variable
376  aux_pulls_profile_name = formatter.format(plot_name,
377  subplot_name="pull profile over {}".format(aux_name))
378  aux_pulls_profile = ValidationPlot(aux_pulls_profile_name, self.referenceFileName)
379  aux_pulls_profile.profile(aux_values,
380  pulls,
381  outlier_z_score=outlier_z_score,
382  gaus_z_score=1.5,
383  allow_discrete=True,
384  is_expert=is_expert)
385  aux_pulls_profile.xlabel = compose_axis_label(aux_name)
386  aux_pulls_profile.ylabel = "pull (" + quantity_name + ")"
387  aux_pulls_profile.title = formatter.format(plot_title,
388  subplot_title='Pull profile over {}'.format(aux_name))
389 
390  self.plots['aux_pulls_profile_' + aux_name] = aux_pulls_profile
391 
392 
393  self.contact = self.contact
394 
395  @property
396  def contact(self):
397  """ returns the contact """
398  return self._contact
399 
400  @contact.setter
401  def contact(self, contact):
402  """
403  sets the contact
404 
405  parameters:
406  contact: new contact information
407  """
408  self._contact = contact
409  for validation_plot in list(self.plots.values()):
410  validation_plot.contact = contact
411 
412  def write(self, tDirectory=None):
413  """ Write all validation plot to the given Root directory
414  parameters:
415  tDirectory - the root directory were to write to
416  """
417  for validation_plot in list(self.plots.values()):
418  validation_plot.write(tDirectory)
tracking.validation.pull.PullAnalysis.plot_name
plot_name
name of the plot
Definition: pull.py:65
tracking.validation.pull.PullAnalysis.default_which_plots
list default_which_plots
default list of plots to be created in this analysis
Definition: pull.py:29
tracking.validation.pull.PullAnalysis.contact
contact
Forward the contract to all plots by reassigning the contact.
Definition: pull.py:385
tracking.validation.pull.PullAnalysis.plot_name_postfix
plot_name_postfix
post fix to be append after the plot name
Definition: pull.py:72
tracking.validation.pull.PullAnalysis
Definition: pull.py:20
tracking.validation.pull.PullAnalysis.plot_name_prefix
plot_name_prefix
prefix to be prepended to the plot name
Definition: pull.py:70
tracking.validation.pull.PullAnalysis.absolute
absolute
if true only the absolute value is compared
Definition: pull.py:62
tracking.validation.pull.PullAnalysis.quantity_name
quantity_name
name of the quantity the analysis is performed on
Definition: pull.py:51
tracking.validation.pull.PullAnalysis.unit
unit
unit the quanitity is given in
Definition: pull.py:53
tracking.validation.pull.PullAnalysis._contact
_contact
contact information
Definition: pull.py:77
tracking.validation.pull.PullAnalysis.default_is_expert
bool default_is_expert
if true the plots created here are declared as expert plots in the validation
Definition: pull.py:45
tracking.validation.pull.PullAnalysis.default_outlier_z_score
float default_outlier_z_score
default outlier z score
Definition: pull.py:23
tracking.root_utils
Definition: root_utils.py:1
tracking.validation.pull.PullAnalysis.default_plot_title
string default_plot_title
default plot title
Definition: pull.py:27
tracking.validation.pull.PullAnalysis.__init__
def __init__(self, quantity_name, unit=None, outlier_z_score=None, absolute=False, contact='', plot_name=None, plot_title=None, plot_name_prefix='', plot_name_postfix='', plot_title_postfix='', referenceFileName=None # if set binnings of plots will be read from corresponding histograms)
Definition: pull.py:47
tracking.validation.pull.PullAnalysis.analyse
def analyse(self, truths, estimates, variances=None, auxiliaries={}, which_plots=None, is_expert=None)
Definition: pull.py:97
tracking.validation.pull.PullAnalysis.write
def write(self, tDirectory=None)
Definition: pull.py:412
tracking.validation.plot.ValidationPlot
Definition: plot.py:152
tracking.validation.pull.PullAnalysis.plot_title
plot_title
title of the plot
Definition: pull.py:67
tracking.validation.pull.PullAnalysis.default_plot_name
string default_plot_name
default plot name
Definition: pull.py:25
tracking.validation.pull.PullAnalysis.outlier_z_score
outlier_z_score
the outlier score defines in terms of how many std deviations a data point is considered as an outlie...
Definition: pull.py:57
tracking.validation.pull.PullAnalysis.plots
plots
dictionary to store the plots
Definition: pull.py:79
tracking.validation.pull.PullAnalysis.referenceFileName
referenceFileName
name of the reference file, if set the binnings of plots will be read from the corresponding object i...
Definition: pull.py:82
tracking.validation.pull.PullAnalysis.plot_title_postfix
plot_title_postfix
postfix to be appended after the title
Definition: pull.py:74