4 from .plot
import ValidationPlot, compose_axis_label, get_unit
7 from .utilities
import erf
15 from .tolerate_missing_key_formatter
import TolerateMissingKeyFormatter
17 formatter = TolerateMissingKeyFormatter()
21 """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
23 default_outlier_z_score = 5.0
25 default_plot_name =
"{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
27 default_plot_title =
"{subplot_title} of {quantity_name}{plot_title_postfix}"
29 default_which_plots = [
38 "aux_residual_hist2d",
39 "aux_residual_profile",
45 default_is_expert =
True
58 plot_title_postfix='',
59 referenceFileName=None
61 """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
66 self.
unit = unit
or get_unit(quantity_name)
69 if outlier_z_score
is None:
92 self.
plots = collections.OrderedDict()
106 """Compares the concrete estimate to the truth and generates plots of the estimates, residuals, pulls and p-values.
107 Close indicates if the figure shall be closed after they are saved.
111 truths : array_like(float)
112 Sample of the true values
113 estimates : array_like(float)
114 Corresponding estimations
115 variances : array_like(float), optional
116 Corresponding variance estimations
117 auxiliaries : Dict[name, array_like(float)], optional
118 Auxiliary variable to show distribution of residuals and pull as function
119 selected_plots : list(str), optional
120 List of analysis plots to be generated. All if not given.
121 Currently valid names are
122 truths, estimates, diag_profile, diag_scatter, residuals,
123 sigmas, pulls, p_values
126 if is_expert
is None:
129 if which_plots
is None:
134 axis_label = compose_axis_label(quantity_name, self.
unit)
143 absolute_truths = truths.copy()
144 absolute_estimates = estimates.copy()
146 flip_sign_for = truths < 0
147 absolute_truths[flip_sign_for] = -truths[flip_sign_for]
148 absolute_estimates[flip_sign_for] = -estimates[flip_sign_for]
150 truths = absolute_truths
151 estimates = absolute_estimates
153 quantity_name =
'absolute ' + quantity_name
155 residuals = estimates - truths
157 if variances
is not None:
158 sigmas = np.sqrt(variances)
159 pulls = np.divide(residuals, sigmas)
160 p_values = 1.0 - erf(np.abs(pulls))
163 if plot_name
is None:
166 plot_name = formatter.format(plot_name,
167 quantity_name=quantity_name,
168 plot_name_prefix=plot_name_prefix,
172 if plot_title
is None:
175 plot_title = formatter.format(plot_title,
176 quantity_name=quantity_name,
182 if "truths" in which_plots:
184 truths_hist_name = formatter.format(plot_name, subplot_name=
"truths")
186 truths_hist.hist(truths,
187 outlier_z_score=outlier_z_score,
189 truths_hist.xlabel = axis_label
190 truths_hist.title = formatter.format(plot_title, subplot_title=
'True distribution')
192 self.
plots[
'truths'] = truths_hist
197 if "estimates" in which_plots:
199 estimates_hist_name = formatter.format(plot_name, subplot_name=
"estimates")
201 estimates_hist.hist(estimates,
202 outlier_z_score=outlier_z_score,
204 estimates_hist.xlabel = axis_label
205 estimates_hist.title = formatter.format(plot_title, subplot_title=
'Estimates distribution')
207 self.
plots[
'estimates'] = estimates_hist
211 if "diag_scatter" in which_plots:
213 estimates_by_truths_scatter_name = formatter.format(plot_name, subplot_name=
"diag_scatter")
215 estimates_by_truths_scatter.scatter(truths,
217 outlier_z_score=outlier_z_score,
219 estimates_by_truths_scatter.xlabel =
'True ' + axis_label
220 estimates_by_truths_scatter.ylabel =
'Estimated ' + axis_label
221 estimates_by_truths_scatter.title = formatter.format(plot_title, subplot_title=
'Diagonal scatter plot')
223 self.
plots[
'diag_scatter'] = estimates_by_truths_scatter
225 if "diag_profile" in which_plots:
227 estimates_by_truths_profile_name = formatter.format(plot_name, subplot_name=
"diag_profile")
231 estimates_by_truths_profile.profile(truths,
233 outlier_z_score=outlier_z_score,
238 hist = estimates_by_truths_profile.histograms[0]
239 GetBinContent = hist.GetBinContent
240 GetBinCenter = hist.GetBinCenter
241 SetBinContent = hist.SetBinContent
242 for i_bin
in range(hist.GetNbinsX() + 2):
243 residual = GetBinContent(i_bin)
244 truth = GetBinCenter(i_bin)
246 SetBinContent(i_bin, residual + truth)
249 estimates_by_truths_profile.histograms[0].SetMaximum()
250 estimates_by_truths_profile.histograms[0].SetMinimum()
252 estimates_by_truths_profile.xlabel =
'True ' + axis_label
253 estimates_by_truths_profile.ylabel =
'Estimated ' + axis_label
255 estimates_by_truths_profile.title = formatter.format(plot_title, subplot_title=
'Diagonal profile')
256 estimates_by_truths_profile.fit_diag()
258 self.
plots[
'diag_profile'] = estimates_by_truths_profile
262 if "residuals" in which_plots:
264 residuals_hist_name = formatter.format(plot_name, subplot_name=
"residuals")
266 residuals_hist.hist(residuals,
267 outlier_z_score=outlier_z_score,
269 residuals_hist.xlabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unit)
270 residuals_hist.title = formatter.format(plot_title, subplot_title=
'Residual distribution')
272 self.
plots[
'residuals'] = residuals_hist
276 if variances
is not None and "sigmas" in which_plots:
279 sigmas_hist_name = formatter.format(plot_name, subplot_name=
"sigmas")
281 sigmas_hist.hist(sigmas,
283 outlier_z_score=outlier_z_score,
285 sigmas_hist.xlabel = compose_axis_label(
"#sigma (" + quantity_name +
')', self.
unit)
286 sigmas_hist.title = formatter.format(plot_title, subplot_title=
'Estimated variance distribution')
288 self.
plots[
'sigmas'] = sigmas_hist
292 if variances
is not None and "pulls" in which_plots:
295 pulls_hist_name = formatter.format(plot_name, subplot_name=
"pulls")
297 pulls_hist.hist(pulls, outlier_z_score=outlier_z_score, is_expert=is_expert)
298 pulls_hist.xlabel =
"pull (" + quantity_name +
")"
299 pulls_hist.title = formatter.format(plot_title, subplot_title=
'Pull distribution')
300 pulls_hist.fit_gaus(z_score=1)
302 self.
plots[
'pulls'] = pulls_hist
306 if variances
is not None and "p_values" in which_plots:
309 p_values_hist_name = formatter.format(plot_name, subplot_name=
"p-values")
311 p_values_hist.hist(p_values, lower_bound=0, upper_bound=1, is_expert=is_expert)
312 p_values_hist.xlabel =
"p-value (" + quantity_name +
")"
313 p_values_hist.title = formatter.format(plot_title, subplot_title=
'P-value distribution')
314 p_values_hist.fit_const()
316 self.
plots[
'p_values'] = p_values_hist
320 for aux_name, aux_values
in auxiliaries.items():
321 if "aux_residual_hist2d" in which_plots
or "aux" in which_plots:
323 aux_residuals_hist2d_name = formatter.format(plot_name,
324 subplot_name=
"residuals over {}".format(aux_name))
326 aux_residuals_hist2d.hist2d(aux_values,
328 outlier_z_score=outlier_z_score,
331 aux_residuals_hist2d.xlabel = compose_axis_label(aux_name)
332 aux_residuals_hist2d.ylabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unit)
333 aux_residuals_hist2d.title = formatter.format(plot_title,
334 subplot_title=
'Residual distribution over {}'.format(aux_name))
336 self.
plots[
'aux_residuals_hist2d_' + aux_name] = aux_residuals_hist2d
338 if "aux_residual_profile" in which_plots
or "aux" in which_plots:
340 aux_residuals_profile_name = formatter.format(plot_name,
341 subplot_name=
"residuals profile over {}".format(aux_name))
343 aux_residuals_profile.profile(aux_values,
345 outlier_z_score=outlier_z_score,
350 aux_residuals_profile.xlabel = compose_axis_label(aux_name)
351 aux_residuals_profile.ylabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unit)
352 aux_residuals_profile.title = formatter.format(plot_title,
353 subplot_title=
'Residual profile over {}'.format(aux_name))
355 self.
plots[
'aux_residuals_profile_' + aux_name] = aux_residuals_profile
357 if variances
is not None and (
"aux_pull_hist2d" in which_plots
or "aux" in which_plots):
359 aux_pulls_hist2d_name = formatter.format(plot_name,
360 subplot_name=
"pulls over {}".format(aux_name))
362 aux_pulls_hist2d.hist2d(aux_values,
364 outlier_z_score=outlier_z_score,
367 aux_pulls_hist2d.xlabel = compose_axis_label(aux_name)
368 aux_pulls_hist2d.ylabel =
"pull (" + quantity_name +
")"
369 aux_pulls_hist2d.title = formatter.format(plot_title,
370 subplot_title=
'Pull scatter over {}'.format(aux_name))
372 self.
plots[
'aux_pulls_hist2d_' + aux_name] = aux_pulls_hist2d
374 if variances
is not None and (
"aux_pull_profile" in which_plots
or "aux" in which_plots):
376 aux_pulls_profile_name = formatter.format(plot_name,
377 subplot_name=
"pull profile over {}".format(aux_name))
379 aux_pulls_profile.profile(aux_values,
381 outlier_z_score=outlier_z_score,
385 aux_pulls_profile.xlabel = compose_axis_label(aux_name)
386 aux_pulls_profile.ylabel =
"pull (" + quantity_name +
")"
387 aux_pulls_profile.title = formatter.format(plot_title,
388 subplot_title=
'Pull profile over {}'.format(aux_name))
390 self.
plots[
'aux_pulls_profile_' + aux_name] = aux_pulls_profile
397 """ returns the contact """
406 contact: new contact information
409 for validation_plot
in list(self.
plots.values()):
410 validation_plot.contact = contact
413 """ Write all validation plot to the given Root directory
415 tDirectory - the root directory were to write to
417 for validation_plot
in list(self.
plots.values()):
418 validation_plot.write(tDirectory)