Belle II Software  release-05-02-19
resolution.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 from .plot import ValidationPlot, compose_axis_label, get_unit
5 
6 # get error function as a np.ufunc vectorised for numpy array
7 from .utilities import erf
8 from tracking.root_utils import root_save_name
9 
10 import math
11 import collections
12 
13 import numpy as np
14 
15 from .tolerate_missing_key_formatter import TolerateMissingKeyFormatter
16 
17 formatter = TolerateMissingKeyFormatter()
18 
19 
20 class ResolutionAnalysis(object):
21  """Perform resolution analysis"""
22 
23 
24  default_outlier_z_score = 5.0
25 
26  default_min_required_entries = 50
27 
28  default_plot_name = "{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
29 
30  default_plot_title = "{subplot_title} of {quantity_name}{plot_title_postfix}"
31 
32  default_which_plots = [
33  "resolution",
34  ]
35 
36 
37  default_is_expert = True
38 
39  def __init__(
40  self,
41  quantity_name,
42  bin_spacing, # can be [0,0.5,1.0] will have 2 bins
43  bin_name,
44  bin_unit=None,
45  unit=None,
46  outlier_z_score=None,
47  contact='',
48  plot_name=None,
49  plot_title=None,
50  min_required_entries=None, # minimum number of entries in a bin for the resolution fit
51  plot_name_prefix='', # depricated use plot_name instead
52  plot_name_postfix='', # depricated use plot_name instead
53  plot_title_postfix='', # depricated use plot_title instead
54  referenceFileName=None, # if set binnings of histograms will be read from corresponding histogram in this file
55  ):
56  """Performs a comparison of an estimated quantity to their truths by generating standardized validation plots."""
57 
58 
59  self.quantity_name = quantity_name
60 
61  self.unit = unit or get_unit(quantity_name)
62 
63  self.bin_spacing = bin_spacing
64 
65  self.bin_name = bin_name
66 
67  self.bin_unit = bin_unit
68 
69  if outlier_z_score is None:
70 
72  else:
73  self.outlier_z_score = outlier_z_score
74 
75 
76  self.min_required_entries = min_required_entries
77  if self.min_required_entries is None:
79 
80 
81  self.plot_name = plot_name
82 
83  self.plot_title = plot_title
84 
85 
86  self.plot_name_prefix = plot_name_prefix or root_save_name(quantity_name)
87 
88  self.plot_name_postfix = plot_name_postfix
89 
90  self.plot_title_postfix = plot_title_postfix
91 
92 
93  self._contact = contact
94 
95  self.plots = collections.OrderedDict()
96 
97 
98  self.referenceFileName = referenceFileName
99 
100  def analyse(
101  self,
102  bin_values,
103  truths,
104  estimates,
105  which_plots=None,
106  is_expert=None
107  ):
108  """Compares the concrete estimate to the truth and generates plots of the resolution
109 
110  Parameters
111  ----------
112  bin_values : array_like(float
113  The parametr used for binning
114  truths : array_like(float)
115  Sample of the true values
116  estimates : array_like(float)
117  Corresponding estimations
118  """
119 
120  if is_expert is None:
121  is_expert = self.default_is_expert
122 
123  if which_plots is None:
124  which_plots = self.default_which_plots
125 
126  quantity_name = self.quantity_name
127 
128  axis_label = compose_axis_label(quantity_name, self.unit)
129 
130  plot_name_prefix = self.plot_name_prefix
131  outlier_z_score = self.outlier_z_score
132 
133  plot_name = self.plot_name
134  if plot_name is None:
135  plot_name = self.default_plot_name
136 
137  plot_name = formatter.format(plot_name,
138  quantity_name=quantity_name,
139  plot_name_prefix=plot_name_prefix,
140  plot_name_postfix=self.plot_name_postfix)
141 
142  plot_title = self.plot_title
143  if plot_title is None:
144  plot_title = self.default_plot_title
145 
146  plot_title = formatter.format(plot_title,
147  quantity_name=quantity_name,
148  plot_title_postfix=self.plot_title_postfix)
149 
150  # compute residuals
151  residuals = estimates - truths
152 
153  # Resolution #
154 
155  if "resolution" in which_plots:
156 
157  # creating plots for all configured bins
158  res_histogram = []
159  resolution_values = []
160 
161  for i in range(len(self.bin_spacing) - 1):
162  lower_bin = self.bin_spacing[i]
163  upper_bin = self.bin_spacing[i + 1]
164  assert (lower_bin < upper_bin)
165  bin_center = lower_bin + (upper_bin - lower_bin) / 2.0
166  assert (len(bin_values) == len(residuals))
167 
168  # compile a list of values which are in this bin
169  sel_residuals = collections.deque()
170 
171  for i in range(len(bin_values)):
172  if bin_values[i] >= lower_bin and bin_values[i] < upper_bin:
173  sel_residuals.append(residuals[i])
174 
175  residuals_hist_name = formatter.format(plot_name, subplot_name="residuals") + \
176  "{}_to_{}".format(lower_bin, upper_bin)
177  vplot = ValidationPlot(residuals_hist_name, self.referenceFileName)
178  vplot.hist(sel_residuals,
179  outlier_z_score=outlier_z_score,
180  is_expert=is_expert)
181  vplot.xlabel = compose_axis_label("#Delta " + quantity_name + " (estimate - truth)", self.unit)
182  vplot.title = formatter.format(plot_title, subplot_title='Residual distribution')
183 
184  # this values will stay None if no fit could be performed
185  gaus_sigma = None
186  gaus_sigma_err = None
187 
188  # check if the minimum number of entries are in the histogram
189  if vplot.histograms[0].GetEntries() >= self.min_required_entries:
190  fit_res = vplot.fit_gaus(z_score=1)
191 
192  # extract fit result from ROOT's TFitResut
193  params = fit_res.GetParams()
194  errs = fit_res.Errors()
195 
196  gaus_mean = params[1]
197  gaus_sigma = params[2]
198  gaus_sigma_err = errs[2]
199 
200  res_histogram += [(lower_bin, upper_bin, bin_center, vplot)]
201  self.plots['residuals' + residuals_hist_name] = vplot
202 
203  # store the fit results
204  resolution_values += [(lower_bin, upper_bin, bin_center, gaus_sigma, gaus_sigma_err)]
205 
206  resolution_graph_name = formatter.format(plot_name, subplot_name="resolution")
207  resolution_graph = ValidationPlot(resolution_graph_name, self.referenceFileName)
208 
209  # compile all requried data going into the final TGraphErrors
210  xs = []
211  xs_err = []
212  ys = []
213  ys_err = []
214 
215  for v in resolution_values:
216  # could be None if no fit was possible for this bin
217  if v[3]:
218  xs += [v[2]]
219  xs_err = [0.0]
220  ys += [v[3]]
221  ys_err = [v[4]]
222 
223  # convert to numpy array before giving to the plotting code
224  resolution_graph.grapherrors((np.array(xs), np.array(xs_err)), (np.array(ys), np.array(ys_err)),
225  is_expert=is_expert)
226  resolution_graph.xlabel = compose_axis_label(self.bin_name, self.bin_unit)
227  resolution_graph.ylabel = compose_axis_label(self.quantity_name, self.unit)
228  resolution_graph.title = formatter.format(plot_title, subplot_title='Resolution')
229 
230  self.plots[resolution_graph_name] = resolution_graph
231 
232 
233  self.contact = self.contact
234 
235  @property
236  def contact(self):
237  """Get the contact person's name"""
238  return self._contact
239 
240  @contact.setter
241  def contact(self, contact):
242  """Set the contact person's name"""
243  self._contact = contact
244  for validation_plot in list(self.plots.values()):
245  validation_plot.contact = contact
246 
247  def write(self, tDirectory=None):
248  """Write all validation plot to the given Root directory"""
249  for validation_plot in list(self.plots.values()):
250  validation_plot.write(tDirectory)
tracking.validation.resolution.ResolutionAnalysis.default_min_required_entries
int default_min_required_entries
default minimum number of entries
Definition: resolution.py:26
tracking.validation.resolution.ResolutionAnalysis.plot_title_postfix
plot_title_postfix
cached value of the suffix appended to the plot title
Definition: resolution.py:74
tracking.validation.resolution.ResolutionAnalysis.analyse
def analyse(self, bin_values, truths, estimates, which_plots=None, is_expert=None)
Definition: resolution.py:100
tracking.validation.resolution.ResolutionAnalysis.plot_name
plot_name
cached value of the base name of the plot
Definition: resolution.py:65
tracking.validation.resolution.ResolutionAnalysis.default_plot_name
string default_plot_name
default plot name
Definition: resolution.py:28
tracking.validation.resolution.ResolutionAnalysis.plot_title
plot_title
cached value of the plot title
Definition: resolution.py:67
tracking.validation.resolution.ResolutionAnalysis.default_is_expert
bool default_is_expert
by default, create expert plots
Definition: resolution.py:37
tracking.validation.resolution.ResolutionAnalysis.__init__
def __init__(self, quantity_name, bin_spacing, bin_name, bin_unit=None, unit=None, outlier_z_score=None, contact='', plot_name=None, plot_title=None, min_required_entries=None, plot_name_prefix='', plot_name_postfix='', plot_title_postfix='', referenceFileName=None)
Definition: resolution.py:39
tracking.validation.resolution.ResolutionAnalysis.min_required_entries
min_required_entries
cached value of the minimum number of entries
Definition: resolution.py:60
tracking.validation.resolution.ResolutionAnalysis.write
def write(self, tDirectory=None)
Definition: resolution.py:247
tracking.validation.resolution.ResolutionAnalysis.bin_spacing
bin_spacing
cached value of the histogram bin spacing
Definition: resolution.py:47
tracking.validation.resolution.ResolutionAnalysis.default_plot_title
string default_plot_title
default plot title
Definition: resolution.py:30
tracking.validation.resolution.ResolutionAnalysis
Definition: resolution.py:20
tracking.validation.resolution.ResolutionAnalysis.contact
contact
Forward the contact to all plots by reassigning the contact.
Definition: resolution.py:226
tracking.validation.resolution.ResolutionAnalysis.bin_unit
bin_unit
cached value of the bin measurement unit
Definition: resolution.py:51
tracking.validation.resolution.ResolutionAnalysis._contact
_contact
cached value of the contact person
Definition: resolution.py:77
tracking.validation.resolution.ResolutionAnalysis.default_which_plots
list default_which_plots
default list of plots to create
Definition: resolution.py:32
tracking.validation.resolution.ResolutionAnalysis.plot_name_prefix
plot_name_prefix
cached value of the prefix prepended to the plot name
Definition: resolution.py:70
tracking.root_utils
Definition: root_utils.py:1
tracking.validation.resolution.ResolutionAnalysis.unit
unit
cached measurement unit for this truth-classification analysis
Definition: resolution.py:45
tracking.validation.resolution.ResolutionAnalysis.outlier_z_score
outlier_z_score
cached value of the Z-score (for outlier detection)
Definition: resolution.py:55
tracking.validation.plot.ValidationPlot
Definition: plot.py:152
tracking.validation.resolution.ResolutionAnalysis.plots
plots
cached value of the dictionary of plots to be created
Definition: resolution.py:79
tracking.validation.resolution.ResolutionAnalysis.default_outlier_z_score
float default_outlier_z_score
default Z-score (for outlier detection)
Definition: resolution.py:24
tracking.validation.resolution.ResolutionAnalysis.quantity_name
quantity_name
cached name of the quantity in the truth-classification analysis
Definition: resolution.py:43
tracking.validation.resolution.ResolutionAnalysis.bin_name
bin_name
cached value of the bin name
Definition: resolution.py:49
tracking.validation.resolution.ResolutionAnalysis.plot_name_postfix
plot_name_postfix
cached value of the suffix appended to the plot name
Definition: resolution.py:72
tracking.validation.resolution.ResolutionAnalysis.referenceFileName
referenceFileName
cached value of the reference filename
Definition: resolution.py:82