23formatter = TolerateMissingKeyFormatter()
27 """Performs a comparison of an estimated quantity to their truths by generating standardized validation plots."""
29 default_outlier_z_score = 5.0
31 default_plot_name =
"{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
33 default_plot_title =
"{subplot_title} of {quantity_name}{plot_title_postfix}"
35 default_which_plots = [
44 "aux_residual_hist2d",
45 "aux_residual_profile",
51 default_is_expert =
True
64 plot_title_postfix='',
65 referenceFileName=None
67 """Performs a comparison of an estimated quantity to their truths by generating standardized validation plots."""
72 self.
unit = unit
or get_unit(quantity_name)
75 if outlier_z_score
is None:
98 self.
plots = collections.OrderedDict()
112 """Compares the concrete estimate to the truth and generates plots of the estimates, residuals, pulls and p-values.
113 Close indicates if the figure shall be closed after they are saved.
117 truths : array_like(float)
118 Sample of the true values
119 estimates : array_like(float)
120 Corresponding estimations
121 variances : array_like(float), optional
122 Corresponding variance estimations
123 auxiliaries : Dict[name, array_like(float)], optional
124 Auxiliary variable to show distribution of residuals
and pull
as function
125 selected_plots : list(str), optional
126 List of analysis plots to be generated. All
if not given.
127 Currently valid names are
128 truths, estimates, diag_profile, diag_scatter, residuals,
129 sigmas, pulls, p_values
132 if is_expert
is None:
135 if which_plots
is None:
140 axis_label = compose_axis_label(quantity_name, self.
unit)
149 absolute_truths = truths.copy()
150 absolute_estimates = estimates.copy()
152 flip_sign_for = truths < 0
153 absolute_truths[flip_sign_for] = -truths[flip_sign_for]
154 absolute_estimates[flip_sign_for] = -estimates[flip_sign_for]
156 truths = absolute_truths
157 estimates = absolute_estimates
159 quantity_name =
'absolute ' + quantity_name
161 residuals = estimates - truths
163 if variances
is not None:
164 sigmas = np.sqrt(variances)
165 pulls = np.divide(residuals, sigmas)
166 p_values = 1.0 - erf(np.abs(pulls))
169 if plot_name
is None:
172 plot_name = formatter.format(plot_name,
173 quantity_name=quantity_name,
174 plot_name_prefix=plot_name_prefix,
178 if plot_title
is None:
181 plot_title = formatter.format(plot_title,
182 quantity_name=quantity_name,
188 if "truths" in which_plots:
190 truths_hist_name = formatter.format(plot_name, subplot_name=
"truths")
192 truths_hist.hist(truths,
193 outlier_z_score=outlier_z_score,
195 truths_hist.xlabel = axis_label
196 truths_hist.title = formatter.format(plot_title, subplot_title=
'True distribution')
198 self.
plots[
'truths'] = truths_hist
203 if "estimates" in which_plots:
205 estimates_hist_name = formatter.format(plot_name, subplot_name=
"estimates")
207 estimates_hist.hist(estimates,
208 outlier_z_score=outlier_z_score,
210 estimates_hist.xlabel = axis_label
211 estimates_hist.title = formatter.format(plot_title, subplot_title=
'Estimates distribution')
213 self.
plots[
'estimates'] = estimates_hist
217 if "diag_scatter" in which_plots:
219 estimates_by_truths_scatter_name = formatter.format(plot_name, subplot_name=
"diag_scatter")
221 estimates_by_truths_scatter.scatter(truths,
223 outlier_z_score=outlier_z_score,
225 estimates_by_truths_scatter.xlabel =
'True ' + axis_label
226 estimates_by_truths_scatter.ylabel =
'Estimated ' + axis_label
227 estimates_by_truths_scatter.title = formatter.format(plot_title, subplot_title=
'Diagonal scatter plot')
229 self.
plots[
'diag_scatter'] = estimates_by_truths_scatter
231 if "diag_profile" in which_plots:
233 estimates_by_truths_profile_name = formatter.format(plot_name, subplot_name=
"diag_profile")
237 estimates_by_truths_profile.profile(truths,
239 outlier_z_score=outlier_z_score,
244 hist = estimates_by_truths_profile.histograms[0]
245 GetBinContent = hist.GetBinContent
246 GetBinCenter = hist.GetBinCenter
247 SetBinContent = hist.SetBinContent
248 for i_bin
in range(hist.GetNbinsX() + 2):
249 residual = GetBinContent(i_bin)
250 truth = GetBinCenter(i_bin)
252 SetBinContent(i_bin, residual + truth)
255 estimates_by_truths_profile.histograms[0].SetMaximum()
256 estimates_by_truths_profile.histograms[0].SetMinimum()
258 estimates_by_truths_profile.xlabel =
'True ' + axis_label
259 estimates_by_truths_profile.ylabel =
'Estimated ' + axis_label
261 estimates_by_truths_profile.title = formatter.format(plot_title, subplot_title=
'Diagonal profile')
262 estimates_by_truths_profile.fit_diag()
264 self.
plots[
'diag_profile'] = estimates_by_truths_profile
268 if "residuals" in which_plots:
270 residuals_hist_name = formatter.format(plot_name, subplot_name=
"residuals")
272 residuals_hist.hist(residuals,
273 outlier_z_score=outlier_z_score,
275 residuals_hist.xlabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unit)
276 residuals_hist.title = formatter.format(plot_title, subplot_title=
'Residual distribution')
278 self.
plots[
'residuals'] = residuals_hist
282 if variances
is not None and "sigmas" in which_plots:
285 sigmas_hist_name = formatter.format(plot_name, subplot_name=
"sigmas")
287 sigmas_hist.hist(sigmas,
289 outlier_z_score=outlier_z_score,
291 sigmas_hist.xlabel = compose_axis_label(
"#sigma (" + quantity_name +
')', self.
unit)
292 sigmas_hist.title = formatter.format(plot_title, subplot_title=
'Estimated variance distribution')
294 self.
plots[
'sigmas'] = sigmas_hist
298 if variances
is not None and "pulls" in which_plots:
301 pulls_hist_name = formatter.format(plot_name, subplot_name=
"pulls")
303 pulls_hist.hist(pulls, outlier_z_score=outlier_z_score, is_expert=is_expert)
304 pulls_hist.xlabel =
"pull (" + quantity_name +
")"
305 pulls_hist.title = formatter.format(plot_title, subplot_title=
'Pull distribution')
306 pulls_hist.fit_gaus(z_score=1)
308 self.
plots[
'pulls'] = pulls_hist
312 if variances
is not None and "p_values" in which_plots:
315 p_values_hist_name = formatter.format(plot_name, subplot_name=
"p-values")
317 p_values_hist.hist(p_values, lower_bound=0, upper_bound=1, is_expert=is_expert)
318 p_values_hist.xlabel =
"p-value (" + quantity_name +
")"
319 p_values_hist.title = formatter.format(plot_title, subplot_title=
'P-value distribution')
320 p_values_hist.fit_const()
322 self.
plots[
'p_values'] = p_values_hist
326 for aux_name, aux_values
in auxiliaries.items():
327 if "aux_residual_hist2d" in which_plots
or "aux" in which_plots:
329 aux_residuals_hist2d_name = formatter.format(plot_name,
330 subplot_name=f
"residuals over {aux_name}")
332 aux_residuals_hist2d.hist2d(aux_values,
334 outlier_z_score=outlier_z_score,
337 aux_residuals_hist2d.xlabel = compose_axis_label(aux_name)
338 aux_residuals_hist2d.ylabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unit)
339 aux_residuals_hist2d.title = formatter.format(plot_title,
340 subplot_title=f
'Residual distribution over {aux_name}')
342 self.
plots[
'aux_residuals_hist2d_' + aux_name] = aux_residuals_hist2d
344 if "aux_residual_profile" in which_plots
or "aux" in which_plots:
346 aux_residuals_profile_name = formatter.format(plot_name,
347 subplot_name=f
"residuals profile over {aux_name}")
349 aux_residuals_profile.profile(aux_values,
351 outlier_z_score=outlier_z_score,
356 aux_residuals_profile.xlabel = compose_axis_label(aux_name)
357 aux_residuals_profile.ylabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unit)
358 aux_residuals_profile.title = formatter.format(plot_title,
359 subplot_title=f
'Residual profile over {aux_name}')
361 self.
plots[
'aux_residuals_profile_' + aux_name] = aux_residuals_profile
363 if variances
is not None and (
"aux_pull_hist2d" in which_plots
or "aux" in which_plots):
365 aux_pulls_hist2d_name = formatter.format(plot_name,
366 subplot_name=f
"pulls over {aux_name}")
368 aux_pulls_hist2d.hist2d(aux_values,
370 outlier_z_score=outlier_z_score,
373 aux_pulls_hist2d.xlabel = compose_axis_label(aux_name)
374 aux_pulls_hist2d.ylabel =
"pull (" + quantity_name +
")"
375 aux_pulls_hist2d.title = formatter.format(plot_title,
376 subplot_title=f
'Pull scatter over {aux_name}')
378 self.
plots[
'aux_pulls_hist2d_' + aux_name] = aux_pulls_hist2d
380 if variances
is not None and (
"aux_pull_profile" in which_plots
or "aux" in which_plots):
382 aux_pulls_profile_name = formatter.format(plot_name,
383 subplot_name=f
"pull profile over {aux_name}")
385 aux_pulls_profile.profile(aux_values,
387 outlier_z_score=outlier_z_score,
391 aux_pulls_profile.xlabel = compose_axis_label(aux_name)
392 aux_pulls_profile.ylabel =
"pull (" + quantity_name +
")"
393 aux_pulls_profile.title = formatter.format(plot_title,
394 subplot_title=f
'Pull profile over {aux_name}')
396 self.
plots[
'aux_pulls_profile_' + aux_name] = aux_pulls_profile
403 """ returns the contact """
412 contact: new contact information
415 for validation_plot
in list(self.
plots.values()):
416 validation_plot.contact = contact
419 """ Write all validation plot to the given Root directory
421 tDirectory - the root directory were to write to
423 for validation_plot
in list(self.
plots.values()):
424 validation_plot.write(tDirectory)
def __init__(self, quantity_name, unit=None, outlier_z_score=None, absolute=False, contact='', plot_name=None, plot_title=None, plot_name_prefix='', plot_name_postfix='', plot_title_postfix='', referenceFileName=None # if set binnings of plots will be read from corresponding histograms)
quantity_name
name of the quantity the analysis is performed on
contact
Forward the contract to all plots by reassigning the contact.
plot_title
title of the plot
outlier_z_score
the outlier score defines in terms of how many std deviations a data point is considered as an outlie...
plots
dictionary to store the plots
float default_outlier_z_score
default outlier z score
unit
unit the quantity is given in
def write(self, tDirectory=None)
_contact
contact information
str default_plot_title
default plot title
plot_name_postfix
post fix to be append after the plot name
def contact(self, contact)
plot_name
name of the plot
plot_title_postfix
postfix to be appended after the title
plot_name_prefix
prefix to be prepended to the plot name
bool default_is_expert
if true the plots created here are declared as expert plots in the validation
list default_which_plots
default list of plots to be created in this analysis
str default_plot_name
default plot name
referenceFileName
name of the reference file, if set the binnings of plots will be read from the corresponding object i...
absolute
if true only the absolute value is compared