24 formatter = TolerateMissingKeyFormatter()
28 """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
30 default_outlier_z_score = 5.0
32 default_plot_name =
"{plot_name_prefix}_{subplot_name}{plot_name_postfix}"
34 default_plot_title =
"{subplot_title} of {quantity_name}{plot_title_postfix}"
36 default_which_plots = [
45 "aux_residual_hist2d",
46 "aux_residual_profile",
52 default_is_expert =
True
65 plot_title_postfix='',
66 referenceFileName=None
68 """Performs a comparision of an estimated quantity to their truths by generating standardized validation plots."""
73 self.
unitunit = unit
or get_unit(quantity_name)
76 if outlier_z_score
is None:
90 self.
plot_name_prefixplot_name_prefix = plot_name_prefix
or root_save_name(quantity_name)
99 self.
plotsplots = collections.OrderedDict()
113 """Compares the concrete estimate to the truth and generates plots of the estimates, residuals, pulls and p-values.
114 Close indicates if the figure shall be closed after they are saved.
118 truths : array_like(float)
119 Sample of the true values
120 estimates : array_like(float)
121 Corresponding estimations
122 variances : array_like(float), optional
123 Corresponding variance estimations
124 auxiliaries : Dict[name, array_like(float)], optional
125 Auxiliary variable to show distribution of residuals and pull as function
126 selected_plots : list(str), optional
127 List of analysis plots to be generated. All if not given.
128 Currently valid names are
129 truths, estimates, diag_profile, diag_scatter, residuals,
130 sigmas, pulls, p_values
133 if is_expert
is None:
136 if which_plots
is None:
141 axis_label = compose_axis_label(quantity_name, self.
unitunit)
150 absolute_truths = truths.copy()
151 absolute_estimates = estimates.copy()
153 flip_sign_for = truths < 0
154 absolute_truths[flip_sign_for] = -truths[flip_sign_for]
155 absolute_estimates[flip_sign_for] = -estimates[flip_sign_for]
157 truths = absolute_truths
158 estimates = absolute_estimates
160 quantity_name =
'absolute ' + quantity_name
162 residuals = estimates - truths
164 if variances
is not None:
165 sigmas = np.sqrt(variances)
166 pulls = np.divide(residuals, sigmas)
167 p_values = 1.0 - erf(np.abs(pulls))
170 if plot_name
is None:
173 plot_name = formatter.format(plot_name,
174 quantity_name=quantity_name,
175 plot_name_prefix=plot_name_prefix,
179 if plot_title
is None:
182 plot_title = formatter.format(plot_title,
183 quantity_name=quantity_name,
189 if "truths" in which_plots:
191 truths_hist_name = formatter.format(plot_name, subplot_name=
"truths")
193 truths_hist.hist(truths,
194 outlier_z_score=outlier_z_score,
196 truths_hist.xlabel = axis_label
197 truths_hist.title = formatter.format(plot_title, subplot_title=
'True distribution')
199 self.
plotsplots[
'truths'] = truths_hist
204 if "estimates" in which_plots:
206 estimates_hist_name = formatter.format(plot_name, subplot_name=
"estimates")
208 estimates_hist.hist(estimates,
209 outlier_z_score=outlier_z_score,
211 estimates_hist.xlabel = axis_label
212 estimates_hist.title = formatter.format(plot_title, subplot_title=
'Estimates distribution')
214 self.
plotsplots[
'estimates'] = estimates_hist
218 if "diag_scatter" in which_plots:
220 estimates_by_truths_scatter_name = formatter.format(plot_name, subplot_name=
"diag_scatter")
222 estimates_by_truths_scatter.scatter(truths,
224 outlier_z_score=outlier_z_score,
226 estimates_by_truths_scatter.xlabel =
'True ' + axis_label
227 estimates_by_truths_scatter.ylabel =
'Estimated ' + axis_label
228 estimates_by_truths_scatter.title = formatter.format(plot_title, subplot_title=
'Diagonal scatter plot')
230 self.
plotsplots[
'diag_scatter'] = estimates_by_truths_scatter
232 if "diag_profile" in which_plots:
234 estimates_by_truths_profile_name = formatter.format(plot_name, subplot_name=
"diag_profile")
238 estimates_by_truths_profile.profile(truths,
240 outlier_z_score=outlier_z_score,
245 hist = estimates_by_truths_profile.histograms[0]
246 GetBinContent = hist.GetBinContent
247 GetBinCenter = hist.GetBinCenter
248 SetBinContent = hist.SetBinContent
249 for i_bin
in range(hist.GetNbinsX() + 2):
250 residual = GetBinContent(i_bin)
251 truth = GetBinCenter(i_bin)
253 SetBinContent(i_bin, residual + truth)
256 estimates_by_truths_profile.histograms[0].SetMaximum()
257 estimates_by_truths_profile.histograms[0].SetMinimum()
259 estimates_by_truths_profile.xlabel =
'True ' + axis_label
260 estimates_by_truths_profile.ylabel =
'Estimated ' + axis_label
262 estimates_by_truths_profile.title = formatter.format(plot_title, subplot_title=
'Diagonal profile')
263 estimates_by_truths_profile.fit_diag()
265 self.
plotsplots[
'diag_profile'] = estimates_by_truths_profile
269 if "residuals" in which_plots:
271 residuals_hist_name = formatter.format(plot_name, subplot_name=
"residuals")
273 residuals_hist.hist(residuals,
274 outlier_z_score=outlier_z_score,
276 residuals_hist.xlabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unitunit)
277 residuals_hist.title = formatter.format(plot_title, subplot_title=
'Residual distribution')
279 self.
plotsplots[
'residuals'] = residuals_hist
283 if variances
is not None and "sigmas" in which_plots:
286 sigmas_hist_name = formatter.format(plot_name, subplot_name=
"sigmas")
288 sigmas_hist.hist(sigmas,
290 outlier_z_score=outlier_z_score,
292 sigmas_hist.xlabel = compose_axis_label(
"#sigma (" + quantity_name +
')', self.
unitunit)
293 sigmas_hist.title = formatter.format(plot_title, subplot_title=
'Estimated variance distribution')
295 self.
plotsplots[
'sigmas'] = sigmas_hist
299 if variances
is not None and "pulls" in which_plots:
302 pulls_hist_name = formatter.format(plot_name, subplot_name=
"pulls")
304 pulls_hist.hist(pulls, outlier_z_score=outlier_z_score, is_expert=is_expert)
305 pulls_hist.xlabel =
"pull (" + quantity_name +
")"
306 pulls_hist.title = formatter.format(plot_title, subplot_title=
'Pull distribution')
307 pulls_hist.fit_gaus(z_score=1)
309 self.
plotsplots[
'pulls'] = pulls_hist
313 if variances
is not None and "p_values" in which_plots:
316 p_values_hist_name = formatter.format(plot_name, subplot_name=
"p-values")
318 p_values_hist.hist(p_values, lower_bound=0, upper_bound=1, is_expert=is_expert)
319 p_values_hist.xlabel =
"p-value (" + quantity_name +
")"
320 p_values_hist.title = formatter.format(plot_title, subplot_title=
'P-value distribution')
321 p_values_hist.fit_const()
323 self.
plotsplots[
'p_values'] = p_values_hist
327 for aux_name, aux_values
in auxiliaries.items():
328 if "aux_residual_hist2d" in which_plots
or "aux" in which_plots:
330 aux_residuals_hist2d_name = formatter.format(plot_name,
331 subplot_name=
"residuals over {}".format(aux_name))
333 aux_residuals_hist2d.hist2d(aux_values,
335 outlier_z_score=outlier_z_score,
338 aux_residuals_hist2d.xlabel = compose_axis_label(aux_name)
339 aux_residuals_hist2d.ylabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unitunit)
340 aux_residuals_hist2d.title = formatter.format(plot_title,
341 subplot_title=
'Residual distribution over {}'.format(aux_name))
343 self.
plotsplots[
'aux_residuals_hist2d_' + aux_name] = aux_residuals_hist2d
345 if "aux_residual_profile" in which_plots
or "aux" in which_plots:
347 aux_residuals_profile_name = formatter.format(plot_name,
348 subplot_name=
"residuals profile over {}".format(aux_name))
350 aux_residuals_profile.profile(aux_values,
352 outlier_z_score=outlier_z_score,
357 aux_residuals_profile.xlabel = compose_axis_label(aux_name)
358 aux_residuals_profile.ylabel = compose_axis_label(
"#Delta " + quantity_name +
" (estimate - truth)", self.
unitunit)
359 aux_residuals_profile.title = formatter.format(plot_title,
360 subplot_title=
'Residual profile over {}'.format(aux_name))
362 self.
plotsplots[
'aux_residuals_profile_' + aux_name] = aux_residuals_profile
364 if variances
is not None and (
"aux_pull_hist2d" in which_plots
or "aux" in which_plots):
366 aux_pulls_hist2d_name = formatter.format(plot_name,
367 subplot_name=
"pulls over {}".format(aux_name))
369 aux_pulls_hist2d.hist2d(aux_values,
371 outlier_z_score=outlier_z_score,
374 aux_pulls_hist2d.xlabel = compose_axis_label(aux_name)
375 aux_pulls_hist2d.ylabel =
"pull (" + quantity_name +
")"
376 aux_pulls_hist2d.title = formatter.format(plot_title,
377 subplot_title=
'Pull scatter over {}'.format(aux_name))
379 self.
plotsplots[
'aux_pulls_hist2d_' + aux_name] = aux_pulls_hist2d
381 if variances
is not None and (
"aux_pull_profile" in which_plots
or "aux" in which_plots):
383 aux_pulls_profile_name = formatter.format(plot_name,
384 subplot_name=
"pull profile over {}".format(aux_name))
386 aux_pulls_profile.profile(aux_values,
388 outlier_z_score=outlier_z_score,
392 aux_pulls_profile.xlabel = compose_axis_label(aux_name)
393 aux_pulls_profile.ylabel =
"pull (" + quantity_name +
")"
394 aux_pulls_profile.title = formatter.format(plot_title,
395 subplot_title=
'Pull profile over {}'.format(aux_name))
397 self.
plotsplots[
'aux_pulls_profile_' + aux_name] = aux_pulls_profile
404 """ returns the contact """
413 contact: new contact information
416 for validation_plot
in list(self.
plotsplots.values()):
417 validation_plot.contact = contact
420 """ Write all validation plot to the given Root directory
422 tDirectory - the root directory were to write to
424 for validation_plot
in list(self.
plotsplots.values()):
425 validation_plot.write(tDirectory)
def __init__(self, quantity_name, unit=None, outlier_z_score=None, absolute=False, contact='', plot_name=None, plot_title=None, plot_name_prefix='', plot_name_postfix='', plot_title_postfix='', referenceFileName=None # if set binnings of plots will be read from corresponding histograms)
quantity_name
name of the quantity the analysis is performed on
contact
Forward the contract to all plots by reassigning the contact.
plot_title
title of the plot
outlier_z_score
the outlier score defines in terms of how many std deviations a data point is considered as an outlie...
plots
dictionary to store the plots
float default_outlier_z_score
default outlier z score
unit
unit the quanitity is given in
def analyse(self, truths, estimates, variances=None, auxiliaries={}, which_plots=None, is_expert=None)
def write(self, tDirectory=None)
_contact
contact information
plot_name_postfix
post fix to be append after the plot name
def contact(self, contact)
plot_name
name of the plot
plot_title_postfix
postfix to be appended after the title
plot_name_prefix
prefix to be prepended to the plot name
string default_plot_title
default plot title
bool default_is_expert
if true the plots created here are declared as expert plots in the validation
list default_which_plots
default list of plots to be created in this analysis
referenceFileName
name of the reference file, if set the binnings of plots will be read from the corresponding object i...
string default_plot_name
default plot name
absolute
if true only the absolute value is compared