Belle II Software  release-08-01-10
plot.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import sys
13 import math
14 import collections
15 import array
16 import numpy as np
17 import ctypes
18 
19 import ROOT
20 
21 import basf2
22 
23 from tracking.root_utils import root_cd, root_save_name
24 
25 from tracking.validation import statistics
26 
27 import logging
28 
29 
30 def get_logger():
31  """Get for the logging.Logger instance of this module
32 
33  Returns
34  -------
35  logging.Logger
36  Logger instance of this module
37  """
38  return logging.getLogger(__name__)
39 
40 
41 
42 units_by_quantity_name = {
43  'x': 'cm',
44  'y': 'cm',
45  'z': 'cm',
46  'E': 'GeV',
47  'px': 'GeV',
48  'p_{x}': 'GeV',
49  'py': 'GeV',
50  'p_{y}': 'GeV',
51  'pz': 'GeV',
52  'p_{z}': 'GeV',
53  'pt': 'GeV',
54  'p_{t}': 'GeV',
55  'd0': 'cm',
56  'd_{0}': 'cm',
57  'phi0': None,
58  '#phi_{0}': None,
59  'omega': '1/cm',
60  '#omega': '1/cm',
61  'z0': 'cm',
62  'z_{0}': 'cm',
63  'tan_lambda': None,
64  'tan #lambda': None}
65 
66 
67 def get_unit(quantity_name):
68  """Infers the unit of a quantity from its name.
69 
70  Assumes the standard Belle II unit system.
71 
72  Currently looks up the quantity string from units_by_quantity_name.
73 
74  Parameters
75  ----------
76  quantity_name : str
77  Name of a quantity (E.g. pt, x, ...)
78 
79  Returns
80  -------
81  str
82  """
83 
84  unit = units_by_quantity_name.get(quantity_name, None)
85  return unit
86 
87 
88 def compose_axis_label(quantity_name, unit=None):
89  """Formats a quantity name and a unit to a label for a plot axes.
90 
91  If the unit is not given to is tried to infer it
92  from the quantity name by the get_unit function.
93 
94  Parameters
95  ----------
96  quantity_name : str
97  Name of the quantity to be displayed at the axes
98  unit : str, optional
99  The unit of the quantity. Defaults to get_unit(quantity_name)
100 
101  Returns
102  -------
103  str
104  """
105 
106  if unit is None:
107  unit = get_unit(quantity_name)
108 
109  if unit is None:
110  axis_label = quantity_name
111  else:
112  axis_label = '%s (%s)' % (quantity_name, unit)
113 
114  return axis_label
115 
116 
117 def get1DBinningFromReference(name, refFileName):
118  """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
119 
120  @param name : name of the TH1 object to be looked for in the file
121  @param refFileName : name of the reference file where the object is searched for
122 
123  @return int nbin, float xmin, float xmax of the TH1
124  """
125 
126  nbins = None
127  x_min = None
128  x_max = None
129 
130  if refFileName is None or refFileName == "":
131  return nbins, x_min, x_max
132 
133  # store current directory to not confuse directories by opening a TFile
134  oldDirectory = ROOT.gROOT.CurrentDirectory().load()
135 
136  tfile = ROOT.TFile(refFileName)
137  if tfile.IsOpen():
138  objptr = tfile.Get(name)
139  if objptr and objptr.InheritsFrom("TH1"):
140  nbins = objptr.GetNbinsX()
141  x_min = objptr.GetXaxis().GetXmin()
142  x_max = objptr.GetXaxis().GetXmax()
143  else:
144  basf2.B2WARNING('Requested object with name: ' + name + ' not found in file: ' + refFileName + " (or not a TH1)")
145  else:
146  basf2.B2WARNING('Requested file: ' + refFileName + ' could not be opened')
147 
148  tfile.Close()
149 
150  # set current directory back to original one
151  oldDirectory.cd()
152 
153  return nbins, x_min, x_max
154 
155 
156 
157 StatsEntry = ROOT.TParameter(float)
158 
159 
160 class ValidationPlot(object):
161 
162  """Class for generating a validation plot for the Belle II validation page.
163 
164  Typically it generates plots from values stored in numpy arrays and feeds them into
165  plot ROOT classes for storing them.
166 
167  It implements an automatic binning procedure based on the rice rule and
168  robust z score outlier detection.
169 
170  It also keeps track of additional statistics typically neglected by ROOT such as a count
171  for the non finit values such as NaN, +Inf, -Inf.
172 
173  The special attributes for the Belle II validation page like
174  * title
175  * contract
176  * description
177  * check
178  are exposed as properties of this class.
179  """
180 
181 
182  very_sparse_dots_line_style_index = 28
183  # This line style is defined in the set_tstyle method below.
184 
185  def __init__(self, name, referenceFileName=None):
186  """Constructor of the ValidationPlot
187 
188  Parameters
189  ----------
190  name : str
191  A unique name to be used as the name of the ROOT object to be generated
192 
193  referenceFileName : str
194  name of a reference file. If set the code will try to get the histogram or profile
195  from that file and determine the number of bins and upper and lower bound
196  (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
197  """
198 
199 
200  self.namename = root_save_name(name)
201 
202 
203  self.referenceFileNamereferenceFileName = referenceFileName
204 
205 
206  self._description_description = ''
207 
208 
209  self._check_check = ''
210 
211 
212  self._contact_contact = ''
213 
214 
215  self._xlabel_xlabel = ''
216 
217 
218  self._ylabel_ylabel = ''
219 
220 
221  self._title_title = ''
222 
223 
224  self.plotplot = None
225 
226 
227  self.histogramshistograms = []
228 
229 
230  self._is_expert_is_expert = True
231 
232 
233  self.pvalue_warnpvalue_warn = None
234 
235 
236  self.pvalue_errorpvalue_error = None
237 
238 
239  self.y_logy_log = False
240 
241  def hist(self,
242  xs,
243  weights=None,
244  stackby=None,
245  bins=None,
246  lower_bound=None,
247  upper_bound=None,
248  outlier_z_score=None,
249  include_exceptionals=True,
250  allow_discrete=False,
251  cumulation_direction=None,
252  is_expert=True):
253  """Fill the plot with a one dimensional histogram."""
254 
255  # if referenceFileName was set the binning will taken from there
256  if self.referenceFileNamereferenceFileName is not None:
257  n, xmin, xmax = get1DBinningFromReference(self.namename, self.referenceFileNamereferenceFileName)
258  if n is not None and xmin is not None and xmax is not None:
259  bins = n
260  upper_bound = xmax
261  lower_bound = xmin
262 
263  th1_factory = ROOT.TH1D
264  self._is_expert_is_expert = is_expert
265 
266  self.create_1dcreate_1d(th1_factory,
267  xs,
268  weights=weights,
269  stackby=stackby,
270  bins=bins,
271  lower_bound=lower_bound,
272  upper_bound=upper_bound,
273  outlier_z_score=outlier_z_score,
274  include_exceptionals=include_exceptionals,
275  allow_discrete=allow_discrete,
276  cumulation_direction=cumulation_direction)
277 
278  if not self.ylabelylabelylabelylabel:
279 
280  self.ylabelylabelylabelylabel = 'count'
281 
282  return self
283 
284  def profile(self,
285  xs,
286  ys,
287  weights=None,
288  stackby=None,
289  bins=None,
290  lower_bound=None,
291  upper_bound=None,
292  y_binary=None,
293  y_log=None,
294  outlier_z_score=None,
295  include_exceptionals=True,
296  allow_discrete=False,
297  cumulation_direction=None,
298  gaus_z_score=None,
299  is_expert=True,
300  is_asymmetry=False):
301  """Fill the plot with a one dimensional profile of one variable over another."""
302 
303  # if referenceFileName was set the binning will taken from there
304  if self.referenceFileNamereferenceFileName is not None:
305  n = None
306  xmin = None
307  xmax = None
308  n, xmin, xmax = get1DBinningFromReference(self.namename, self.referenceFileNamereferenceFileName)
309  if n is not None and xmin is not None and xmax is not None:
310  bins = n
311  upper_bound = xmax
312  lower_bound = xmin
313 
314  th1_factory = ROOT.TProfile
315  self._is_expert_is_expert = is_expert
316  if gaus_z_score is None:
317  self.create_1dcreate_1d(th1_factory,
318  xs,
319  ys,
320  weights=weights,
321  stackby=stackby,
322  bins=bins,
323  lower_bound=lower_bound,
324  upper_bound=upper_bound,
325  outlier_z_score=outlier_z_score,
326  include_exceptionals=include_exceptionals,
327  allow_discrete=allow_discrete,
328  cumulation_direction=cumulation_direction)
329  else:
330  # Introduce a dummy name for the temporary two dimensional histogram
331  self.namename = "_" + self.namename
332  self.hist2dhist2d(xs, ys=ys, weights=weights, stackby=stackby,
333  bins=(bins, None),
334  lower_bound=(lower_bound, None),
335  upper_bound=(upper_bound, None),
336  outlier_z_score=(outlier_z_score, outlier_z_score),
337  include_exceptionals=(include_exceptionals, True),
338  allow_discrete=(allow_discrete, False),
339  is_expert=is_expert)
340 
341  self.namename = self.namename[1:]
342  profiles = []
343  for histogram in self.histogramshistograms:
344  profile = self.gaus_slice_fitgaus_slice_fit(histogram,
345  name=histogram.GetName()[1:],
346  z_score=gaus_z_score)
347  profiles.append(profile)
348  self.histogramshistograms = profiles
349  self.plotplot = self.create_stackcreate_stack(profiles, name=self.plotplot.GetName()[1:], reverse_stack=False)
350 
351  if y_log:
352  self.y_logy_log = True
353 
354  if y_binary or self.is_binaryis_binary(ys):
355  if not self.ylabelylabelylabelylabel:
356  self.ylabelylabelylabelylabel = 'probability'
357 
358  min_y = 0
359  if(is_asymmetry):
360  min_y = -1.05
361  for histogram in self.histogramshistograms:
362  histogram.SetMinimum(min_y)
363  histogram.SetMaximum(1.05)
364 
365  self.plotplot.SetMinimum(min_y)
366  self.plotplot.SetMaximum(1.05)
367 
368  return self
369 
370  def scatter(self,
371  xs,
372  ys,
373  stackby=None,
374  lower_bound=(None, None),
375  upper_bound=(None, None),
376  outlier_z_score=(None, None),
377  include_exceptionals=(True, True),
378  max_n_data=100000,
379  is_expert=True):
380  """Fill the plot with a (unbinned) two dimensional scatter plot"""
381 
382  self._is_expert_is_expert = is_expert
383 
384  x_lower_bound, y_lower_bound = self.unpack_2d_paramunpack_2d_param(lower_bound)
385  x_upper_bound, y_upper_bound = self.unpack_2d_paramunpack_2d_param(upper_bound)
386  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_paramunpack_2d_param(outlier_z_score)
387  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_paramunpack_2d_param(include_exceptionals)
388 
389  x_lower_bound, x_upper_bound = self.determine_rangedetermine_range(
390  xs,
391  lower_bound=x_lower_bound,
392  upper_bound=x_upper_bound,
393  outlier_z_score=x_outlier_z_score,
394  include_exceptionals=x_include_exceptionals
395  )
396 
397  y_lower_bound, y_upper_bound = self.determine_rangedetermine_range(
398  ys,
399  lower_bound=y_lower_bound,
400  upper_bound=y_upper_bound,
401  outlier_z_score=y_outlier_z_score,
402  include_exceptionals=y_include_exceptionals
403  )
404 
405  graph = ROOT.TGraph()
406 
407  graph.SetName(self.namename)
408  graph.SetMarkerStyle(6)
409  graph.GetHistogram().SetOption("AP") # <-- this has no effect?
410 
411  # Trying to make the graph lines invisible
412  color_index = 0 # white
413  # Transperent white
414  graph.SetLineColorAlpha(color_index, 0)
415  graph.SetLineStyle(self.very_sparse_dots_line_style_indexvery_sparse_dots_line_style_index)
416 
417  # Transport the lower and upper bound as ranges of the axis
418  graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
419  graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
420 
421  self.createcreate(graph,
422  xs=xs,
423  ys=ys,
424  stackby=stackby,
425  reverse_stack=False)
426 
427  return self
428 
429  def grapherrors(self,
430  xs_and_err,
431  ys_and_err,
432  stackby=None,
433  lower_bound=(None, None),
434  upper_bound=(None, None),
435  outlier_z_score=(None, None),
436  include_exceptionals=(True, True),
437  max_n_data=100000,
438  is_expert=True):
439  """Fill the plot with a (unbinned) two dimensional scatter plot
440  xs_and_err and ys_and_err are tuples containing the values and the errors on these values
441  as numpy arrays
442  """
443 
444  self._is_expert_is_expert = is_expert
445 
446  xs = xs_and_err[0]
447  ys = ys_and_err[0]
448 
449  x_lower_bound, y_lower_bound = self.unpack_2d_paramunpack_2d_param(lower_bound)
450  x_upper_bound, y_upper_bound = self.unpack_2d_paramunpack_2d_param(upper_bound)
451  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_paramunpack_2d_param(outlier_z_score)
452  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_paramunpack_2d_param(include_exceptionals)
453 
454  x_lower_bound, x_upper_bound = self.determine_rangedetermine_range(
455  xs,
456  lower_bound=x_lower_bound,
457  upper_bound=x_upper_bound,
458  outlier_z_score=x_outlier_z_score,
459  include_exceptionals=x_include_exceptionals
460  )
461 
462  y_lower_bound, y_upper_bound = self.determine_rangedetermine_range(
463  ys,
464  lower_bound=y_lower_bound,
465  upper_bound=y_upper_bound,
466  outlier_z_score=y_outlier_z_score,
467  include_exceptionals=y_include_exceptionals
468  )
469 
470  graph = ROOT.TGraphErrors()
471 
472  graph.SetName(self.namename)
473  graph.GetHistogram().SetOption("A")
474 
475  graph.SetMarkerColor(4)
476  graph.SetMarkerStyle(21)
477 
478  # Transport the lower and upper bound as ranges of the axis
479  graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
480  graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
481 
482  self.createcreate(graph,
483  xs=xs,
484  ys=ys,
485  stackby=stackby,
486  reverse_stack=False)
487 
488  return self
489 
490  def hist2d(self,
491  xs,
492  ys,
493  weights=None,
494  stackby=None,
495  bins=(None, None),
496  lower_bound=(None, None),
497  upper_bound=(None, None),
498  outlier_z_score=(None, None),
499  include_exceptionals=(True, True),
500  allow_discrete=(False, False),
501  quantiles=None,
502  is_expert=True):
503  """Fill the plot with a two dimensional histogram"""
504 
505  name = self.namename
506  # Introduce a dummy name for the temporary two dimensional histogram
507  if quantiles is not None:
508  name = "_" + self.namename
509 
510  x_bins, y_bins = self.unpack_2d_paramunpack_2d_param(bins)
511  x_lower_bound, y_lower_bound = self.unpack_2d_paramunpack_2d_param(lower_bound)
512  x_upper_bound, y_upper_bound = self.unpack_2d_paramunpack_2d_param(upper_bound)
513  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_paramunpack_2d_param(outlier_z_score)
514  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_paramunpack_2d_param(include_exceptionals)
515  x_allow_discrete, y_allow_discrete = self.unpack_2d_paramunpack_2d_param(allow_discrete)
516 
517  if quantiles is not None:
518  y_include_exceptionals = True
519  y_allow_discrete = False
520 
521  x_bin_edges, x_bin_labels = self.determine_bin_edgesdetermine_bin_edges(xs,
522  stackbys=stackby,
523  bins=x_bins,
524  lower_bound=x_lower_bound,
525  upper_bound=x_upper_bound,
526  outlier_z_score=x_outlier_z_score,
527  include_exceptionals=x_include_exceptionals,
528  allow_discrete=x_allow_discrete)
529 
530  y_bin_edges, y_bin_labels = self.determine_bin_edgesdetermine_bin_edges(ys,
531  stackbys=stackby,
532  bins=y_bins,
533  lower_bound=y_lower_bound,
534  upper_bound=y_upper_bound,
535  outlier_z_score=y_outlier_z_score,
536  include_exceptionals=y_include_exceptionals,
537  allow_discrete=y_allow_discrete)
538 
539  n_x_bins = len(x_bin_edges) - 1
540  n_y_bins = len(y_bin_edges) - 1
541 
542  self.lower_boundlower_bound = (x_bin_edges[0], y_bin_edges[0])
543 
544  self.upper_boundupper_bound = (x_bin_edges[-1], y_bin_edges[-1])
545 
546  histogram = ROOT.TH2D(name,
547  '',
548  n_x_bins,
549  x_bin_edges,
550  n_y_bins,
551  y_bin_edges)
552 
553  if x_bin_labels:
554  get_logger().info("Scatter plot %s is discrete in x.", name)
555  x_taxis = histogram.GetXaxis()
556  for i_x_bin, x_bin_label in enumerate(x_bin_labels):
557  x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
558  self.add_stats_entryadd_stats_entry(histogram, "dx", 1)
559 
560  else:
561  x_bin_width = x_bin_edges[1] - x_bin_edges[0]
562  self.add_stats_entryadd_stats_entry(histogram, "dx", x_bin_width)
563 
564  if y_bin_labels:
565  get_logger().info("Scatter plot %s is discrete in y.", name)
566  y_taxis = histogram.GetYaxis()
567  for i_y_bin, y_bin_label in enumerate(y_bin_labels):
568  y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
569  self.add_stats_entryadd_stats_entry(histogram, "dy", 1)
570 
571  else:
572  y_bin_width = y_bin_edges[1] - y_bin_edges[0]
573  self.add_stats_entryadd_stats_entry(histogram, "dy", y_bin_width)
574 
575  self.createcreate(histogram, xs, ys=ys, weights=weights, stackby=stackby)
576 
577  if quantiles is not None:
578  self.namename = self.namename[1:]
579  profiles = []
580  for histogram in self.histogramshistograms:
581  for quantile in quantiles:
582  profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] + '_' + str(quantile))
583 
584  # Manually copy labels grumble grumble
585  x_taxis = histogram.GetXaxis()
586  new_x_taxis = profile.GetXaxis()
587  for i_bin in range(x_taxis.GetNbins() + 2):
588  label = x_taxis.GetBinLabel(i_bin)
589  if label != "":
590  new_x_taxis.SetBinLabel(i_bin, label)
591 
592  # Remove faulty error values)
593  epsilon = sys.float_info.epsilon
594  for i_bin in range(0, profile.GetNbinsX() + 2):
595  profile.SetBinError(i_bin, epsilon)
596 
597  profiles.append(profile)
598 
599  self.histogramshistograms = profiles
600  self.plotplot = self.create_stackcreate_stack(profiles, name=self.plotplot.GetName()[1:], reverse_stack=False, force_graph=True)
601 
602  # Adjust the discrete bins after the filling to be equidistant
603  if x_bin_labels:
604  for histogram in self.histogramshistograms:
605  x_taxis = histogram.GetXaxis()
606  x_bin_edges = array.array("d", list(range(len(x_bin_labels) + 1)))
607  x_taxis.Set(n_x_bins, x_bin_edges)
608 
609  # Adjust the discrete bins after the filling to be equidistant
610  if y_bin_labels:
611  for histogram in self.histogramshistograms:
612  x_taxis = histogram.GetXaxis()
613  y_bin_edges = array.array("d", list(range(len(y_bin_labels) + 1)))
614  y_taxis.Set(n_y_bins, y_bin_edges)
615 
616  return self
617 
618  def fit_gaus(self, z_score=None):
619  """Fit a gaus belle curve to the central portion of a one dimensional histogram
620 
621  The fit is applied to the central mean +- z_score * std interval of the histogram,
622  such that it is less influence by non gaussian tails further away than the given z score.
623 
624  @param float z_score number of sigmas to include from the mean value of the histogram.
625  """
626  title = "gaus"
627  formula = "gaus"
628 
629  plot = self.plotplot
630  if plot is None:
631  raise RuntimeError('Validation plot must be filled before it can be fitted.')
632 
633  if not isinstance(plot, ROOT.TH1D):
634  raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
635 
636  histogram = plot
637 
638  fit_tf1 = ROOT.TF1("Fit", formula)
639  fit_tf1.SetTitle(title)
640  fit_tf1.SetParName(0, "n")
641  fit_tf1.SetParName(1, "mean")
642  fit_tf1.SetParName(2, "std")
643 
644  n = histogram.GetSumOfWeights()
645  mean = histogram.GetMean()
646  std = histogram.GetStdDev()
647 
648  fit_tf1.SetParameter(0, n)
649  fit_tf1.SetParameter(1, mean)
650  fit_tf1.SetParameter(2, std)
651 
652  fit_options = "LM"
653  return self.fitfit(fit_tf1,
654  fit_options,
655  z_score=z_score)
656 
657  def fit_line(self):
658  """Fit a general line to a one dimensional histogram"""
659  title = "line"
660  formula = "x++1"
661  fit_tf1 = ROOT.TF1("Fit", formula)
662  fit_tf1.SetTitle(title)
663  fit_tf1.SetParName(0, "slope")
664  fit_tf1.SetParName(1, "intercept")
665  self.fitfit(fit_tf1, 'M')
666 
667  def fit_const(self):
668  """Fit a constant function to a one dimensional histogram"""
669  title = "const"
670  formula = "[0]"
671  fit_tf1 = ROOT.TF1("Fit", formula)
672  fit_tf1.SetTitle(title)
673  fit_tf1.SetParName(0, "intercept")
674  self.fitfit(fit_tf1, 'M')
675 
676  def fit_diag(self):
677  """Fit a diagonal line through the origin to a one dimensional histogram"""
678  title = "diag"
679  formula = "[0]*x"
680  fit_tf1 = ROOT.TF1("Fit", formula)
681  fit_tf1.SetTitle(title)
682  fit_tf1.SetParName(0, "slope")
683  self.fitfit(fit_tf1, 'M')
684 
685  def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
686  """Fit a user defined function to a one dimensional histogram
687 
688  Parameters
689  ----------
690  formula : str or TF1
691  Formula string or TH1 to be fitted. See TF1 constructurs for that is a valid formula
692  options : str
693  Options string to be used in the fit. See TH1::Fit()
694  lower_bound : float
695  Lower bound of the range to be fitted
696  upper_bound : float
697  Upper bound of the range to be fitted
698  """
699  plot = self.plotplot
700  if plot is None:
701  raise RuntimeError('Validation plot must be filled before it can be fitted.')
702 
703  if not isinstance(plot, ROOT.TH1D):
704  raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
705 
706  histogram = plot
707 
708  xaxis = histogram.GetXaxis()
709  n_bins = xaxis.GetNbins()
710  hist_lower_bound = xaxis.GetBinLowEdge(1)
711  hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
712 
713  if z_score is not None:
714  mean = histogram.GetMean()
715  std = histogram.GetStdDev()
716 
717  if lower_bound is None:
718  lower_bound = mean - z_score * std
719 
720  if upper_bound is None:
721  upper_bound = mean + z_score * std
722 
723  # Setup the plotting range of the function to match the histogram
724  if isinstance(formula, ROOT.TF1):
725  fit_tf1 = formula
726  fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
727  else:
728  fit_tf1 = ROOT.TF1("Fit",
729  formula,
730  hist_lower_bound,
731  hist_upper_bound)
732  get_logger().info('Fitting with %s', fit_tf1.GetExpFormula())
733 
734  # Determine fit range
735  if lower_bound is None or lower_bound < hist_lower_bound:
736  lower_bound = hist_lower_bound
737  if upper_bound is None or upper_bound > hist_upper_bound:
738  upper_bound = hist_upper_bound
739 
740  # Make sure the fitted function is not automatically added since we want to do that one our own.
741  # Look for the documentation of TH1::Fit() for details of the options.
742  if 'N' not in options:
743  options += 'N'
744 
745  fit_res = histogram.Fit(fit_tf1, options + "S", "", lower_bound, upper_bound)
746 
747  self.set_fit_tf1set_fit_tf1(histogram, fit_tf1)
748  return fit_res
749 
750  def show(self):
751  """Show the plot."""
752  if self.plotplot:
753  self.plotplot.Draw()
754  else:
755  raise ValueError("Can not show a validation plot that has not been filled.")
756 
757  def write(self, tdirectory=None):
758  """Write the plot to file
759 
760  Parameters
761  ----------
762  tdirectory : ROOT.TDirectory, optional
763  ROOT directory to which the plot should be written.
764  If omitted write to the current directory
765  """
766  if not self.plotplot:
767  raise ValueError("Can not write a validation plot that has not been filled.")
768 
769  with root_cd(tdirectory):
770  ValidationPlot.set_tstyle()
771  if self.plotplot not in self.histogramshistograms:
772  self.plotplot.Write()
773 
774  # always disable ROOT's stat plot because it hides items
775  meta_options = ["nostats"]
776 
777  # add expert option, if requested
778  if self.is_expertis_expert:
779  meta_options.append("expert")
780  else:
781  meta_options.append("shifter")
782 
783  # give a custom pvalue warning / error zone if requested
784  if self.pvalue_errorpvalue_error is not None:
785  meta_options.append("pvalue-error={}".format(self.pvalue_errorpvalue_error))
786  if self.pvalue_warnpvalue_warn is not None:
787  meta_options.append("pvalue-warn={}".format(self.pvalue_warnpvalue_warn))
788 
789  # Indicator if the y axes should be displayed as a logarithmic scale
790  if self.y_logy_log:
791  meta_options.append("logy")
792 
793  meta_options_str = ",".join(meta_options)
794 
795  for histogram in self.histogramshistograms:
796  histogram.GetListOfFunctions().Add(ROOT.TNamed('MetaOptions', meta_options_str))
797  histogram.Write()
798 
799  @property
800  def is_expert(self):
801  """Getter method if an plot plot is marked as expert plot"""
802  return self._is_expert_is_expert
803 
804  @property
805  def title(self):
806  """Getter for the plot title"""
807  return self._title_title
808 
809  @title.setter
810  def title(self, title):
811  """Setter for the plot title"""
812  self._title_title = title
813  if self.plotplot:
814  self.plotplot.SetTitle(title)
815  for histogram in self.histogramshistograms:
816  histogram.SetTitle(title)
817 
818  @property
819  def xlabel(self):
820  """Getter for the axis label at the x axis"""
821  return self._xlabel_xlabel
822 
823  @xlabel.setter
824  def xlabel(self, xlabel):
825  """Setter for the axis label at the x axis"""
826  self._xlabel_xlabel = xlabel
827  for histogram in self.histogramshistograms:
828  histogram.GetXaxis().SetTitle(xlabel)
829 
830  @property
831  def ylabel(self):
832  """Getter for the axis label at the y axis"""
833  return self._ylabel_ylabel
834 
835  @ylabel.setter
836  def ylabel(self, ylabel):
837  """Setter for the axis label at the y axis"""
838  self._ylabel_ylabel = ylabel
839  for histogram in self.histogramshistograms:
840  histogram.GetYaxis().SetTitle(ylabel)
841 
842  @property
843  def contact(self):
844  """Getter for the contact email address to be displayed on the validation page"""
845  return self._contact_contact
846 
847  @contact.setter
848  def contact(self, contact):
849  """Setter for the contact email address to be displayed on the validation page"""
850  self._contact_contact = contact
851  for histogram in self.histogramshistograms:
852  found_obj = histogram.FindObject('Contact')
853  if not found_obj:
854  tnamed = ROOT.TNamed("Contact", contact)
855  histogram.GetListOfFunctions().Add(tnamed)
856  found_obj = histogram.FindObject('Contact')
857  found_obj.SetTitle(contact)
858 
859  @property
860  def description(self):
861  """Getter for the description to be displayed on the validation page"""
862  return self._description_description
863 
864  @description.setter
865  def description(self, description):
866  """Setter for the description to be displayed on the validation page"""
867  self._description_description = description
868  for histogram in self.histogramshistograms:
869  found_obj = histogram.FindObject('Description')
870  if not found_obj:
871  tnamed = ROOT.TNamed("Description", description)
872  histogram.GetListOfFunctions().Add(tnamed)
873  found_obj = histogram.FindObject('Description')
874  found_obj.SetTitle(description)
875 
876  @property
877  def check(self):
878  """Getter for the check to be displayed on the validation page"""
879  return self._check_check
880 
881  @check.setter
882  def check(self, check):
883  """Setter for the check to be displayed on the validation page"""
884  self._check_check = check
885  for histogram in self.histogramshistograms:
886  found_obj = histogram.FindObject('Check')
887  if not found_obj:
888  tnamed = ROOT.TNamed("Check", check)
889  histogram.GetListOfFunctions().Add(tnamed)
890  found_obj = histogram.FindObject('Check')
891  found_obj.SetTitle(check)
892 
893  # Implementation details #
894  # ###################### #
895 
896  @staticmethod
897  def unpack_2d_param(param):
898  """Unpacks a function parameter for the two dimensional plots.
899 
900  If it is a pair the first parameter shall apply to the x coordinate
901  the second to the y coordinate. In this case the pair is returned as two values
902 
903  If something else is given the it is assumed that this parameter should equally apply
904  to both coordinates. In this case the same values is return twice as a pair.
905 
906  Parameters
907  ----------
908  param : pair or single value
909  Function parameter for a two dimensional plot
910 
911  Returns
912  -------
913  pair
914  A pair of values being the parameter for the x coordinate and
915  the y coordinate respectively
916  """
917  try:
918  if len(param) == 2:
919  x_param, y_param = param
920  except TypeError:
921  x_param = param
922  y_param = param
923  return x_param, y_param
924 
925  @staticmethod
926  def is_binary(xs):
927  """Determine if the data consists of boolean values"""
928  return statistics.is_binary_series(xs)
929 
930  @staticmethod
931  def is_discrete(xs, max_n_unique=None):
932  """Determine if the data consists of discrete values"""
933  return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
934 
935  @staticmethod
937  """Find exceptionally frequent values
938 
939  Parameters
940  ----------
941  xs : np.array (1d)
942  Data series
943 
944  Returns
945  -------
946  np.array (1d)
947  A list of the found exceptional values.
948  """
949  return statistics.rice_exceptional_values(xs)
950 
951  @staticmethod
953  """Does an estimation of mean and standard deviation robust against outliers.
954 
955  Parameters
956  ----------
957  xs : np.array (1d)
958  Data series
959 
960  Returns
961  -------
962  float, float
963  Pair of mean and standard deviation
964  """
965  x_mean = statistics.truncated_mean(xs)
966  x_std = statistics.trimmed_std(xs)
967  return x_mean, x_std
968 
969  @staticmethod
970  def format_bin_label(value):
971  """Formats a value to be placed at a tick on an axis."""
972  if np.isfinite(value) and value == np.round(value):
973  return str(int(value))
974  else:
975  formated_value = "{:.5g}".format(value)
976 
977  # if the label is to long, switch to shorter "e" format
978  if len(formated_value) > 8:
979  formated_value = "{:.3e}".format(value)
980  return formated_value
981 
982  def create_1d(self,
983  th1_factory,
984  xs,
985  ys=None,
986  weights=None,
987  bins=None,
988  stackby=None,
989  lower_bound=None,
990  upper_bound=None,
991  outlier_z_score=None,
992  include_exceptionals=True,
993  allow_discrete=False,
994  cumulation_direction=None):
995  """Combined factory method for creating a one dimensional histogram or a profile plot."""
996  name = self.namename
997 
998  # Coerce values to a numpy array. Do not copy if already a numpy array.
999  xs = np.array(xs, copy=False)
1000 
1001  if ys is not None:
1002  ys = np.array(ys, copy=False)
1003 
1004  if weights is not None:
1005  weights = np.array(weights, copy=False)
1006 
1007  bin_edges, bin_labels = self.determine_bin_edgesdetermine_bin_edges(xs,
1008  stackbys=stackby,
1009  bins=bins,
1010  lower_bound=lower_bound,
1011  upper_bound=upper_bound,
1012  outlier_z_score=outlier_z_score,
1013  include_exceptionals=include_exceptionals,
1014  allow_discrete=allow_discrete)
1015 
1016  n_bins = len(bin_edges) - 1
1017  self.lower_boundlower_bound = bin_edges[0]
1018  self.upper_boundupper_bound = bin_edges[-1]
1019  histogram = th1_factory(name, '', n_bins, bin_edges)
1020 
1021  if bin_labels:
1022  get_logger().info("One dimensional plot %s is discrete in x.", name)
1023  x_taxis = histogram.GetXaxis()
1024  for i_bin, bin_label in enumerate(bin_labels):
1025  x_taxis.SetBinLabel(i_bin + 1, bin_label)
1026  self.add_stats_entryadd_stats_entry(histogram, "dx", 1)
1027 
1028  else:
1029  bin_width = bin_edges[1] - bin_edges[0]
1030  self.add_stats_entryadd_stats_entry(histogram, "dx", bin_width)
1031 
1032  self.createcreate(histogram,
1033  xs,
1034  ys=ys,
1035  weights=weights,
1036  stackby=stackby,
1037  cumulation_direction=cumulation_direction,
1038  reverse_stack=True)
1039  # Reverse the stack to have the signal distribution at the bottom
1040 
1041  # Adjust the discrete bins after the filling to be equidistant
1042  if bin_labels:
1043  for histogram in self.histogramshistograms:
1044  x_taxis = histogram.GetXaxis()
1045  bin_edges = array.array("d", list(range(len(bin_labels) + 1)))
1046  x_taxis.Set(n_bins, bin_edges)
1047 
1048  def create(self,
1049  histogram_template,
1050  xs,
1051  ys=None,
1052  weights=None,
1053  stackby=None,
1054  cumulation_direction=None,
1055  reverse_stack=None):
1056  """Create histograms from a template, possibly stacked"""
1057 
1058  histograms = []
1059 
1060  if stackby is None:
1061  histogram = histogram_template
1062  self.fill_intofill_into(histogram, xs, ys, weights=weights)
1063  if cumulation_direction is not None:
1064  histogram = self.cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1065 
1066  histograms.append(histogram)
1067  plot = histogram
1068 
1069  else:
1070  stackby = np.array(stackby, copy=False)
1071  name = histogram_template.GetName()
1072 
1073  histograms = self.fill_into_groupedfill_into_grouped(histogram_template,
1074  xs,
1075  ys=ys,
1076  weights=weights,
1077  groupbys=stackby,
1078  groupby_label="stack")
1079 
1080  if cumulation_direction is not None:
1081  histograms = [self.cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1082  for histogram in histograms]
1083 
1084  plot = self.create_stackcreate_stack(histograms, name=name + "_stacked", reverse_stack=reverse_stack)
1085 
1086  self.histogramshistograms = histograms
1087  self.plotplot = plot
1088  self.attach_attributesattach_attributes()
1089 
1090  @classmethod
1091  def create_stack(cls, histograms, name, reverse_stack, force_graph=False):
1092  """Create a stack of histograms"""
1093  if len(histograms) == 1:
1094  plot = histograms[0]
1095  else:
1096  if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph)) or force_graph:
1097  plot = ROOT.TMultiGraph()
1098  else:
1099  plot = ROOT.THStack()
1100 
1101  plot.SetName(name)
1102 
1103  # Add the histogram in reverse order such
1104  # that the signal usually is on the bottom an well visible
1105  if reverse_stack:
1106  for histogram in reversed(histograms):
1107  if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1108  histogram = cls.convert_tprofile_to_tgrapherrorsconvert_tprofile_to_tgrapherrors(histogram)
1109  plot.Add(histogram, "APZ")
1110  else:
1111  plot.Add(histogram)
1112  else:
1113  for histogram in histograms:
1114  if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1115  histogram = cls.convert_tprofile_to_tgrapherrorsconvert_tprofile_to_tgrapherrors(histogram)
1116  plot.Add(histogram, "APZ")
1117  else:
1118  plot.Add(histogram)
1119 
1120  return plot
1121 
1122  @classmethod
1123  def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False):
1124  """Extract errors from a TProfile histogram and create a TGraph from these"""
1125  if isinstance(tprofile, ROOT.TGraph):
1126  return tprofile
1127 
1128  x_taxis = tprofile.GetXaxis()
1129  n_bins = x_taxis.GetNbins()
1130 
1131  # bin_ids_with_underflow = list(range(n_bins + 1))
1132  bin_ids_without_underflow = list(range(1, n_bins + 1))
1133 
1134  bin_centers = np.array([x_taxis.GetBinCenter(i_bin) for i_bin in bin_ids_without_underflow])
1135  if abs_x:
1136  bin_centers = np.abs(bin_centers)
1137  bin_widths = np.array([x_taxis.GetBinWidth(i_bin) for i_bin in bin_ids_without_underflow])
1138  bin_x_errors = bin_widths / 2.0
1139 
1140  # Now for the histogram content not including the underflow
1141  bin_contents = np.array([tprofile.GetBinContent(i_bin) for i_bin in bin_ids_without_underflow])
1142  bin_y_errors = np.array([tprofile.GetBinError(i_bin) for i_bin in bin_ids_without_underflow])
1143 
1144  tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1145 
1146  tgrapherrors.GetHistogram().SetOption("APZ") # <-- This has no effect?
1147 
1148  tgrapherrors.SetLineColor(tprofile.GetLineColor())
1149  tgrapherrors.SetLineColor(tprofile.GetLineColor())
1150 
1151  # Copy all functions and stats entries
1152  for tobject in tprofile.GetListOfFunctions():
1153  tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1154 
1155  # Add stats entries that are displayed for profile plots
1156  cls.add_stats_entryadd_stats_entry(tgrapherrors, 'count', tprofile.GetEntries())
1157 
1158  stats_values = np.array([np.nan] * 6)
1159  tprofile.GetStats(stats_values)
1160 
1161  sum_w = stats_values[0]
1162  # sum_w2 = stats_values[1]
1163  sum_wx = stats_values[2]
1164  sum_wx2 = stats_values[3]
1165  sum_wy = stats_values[4]
1166  sum_wy2 = stats_values[5]
1167 
1168  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1169  "x avg",
1170  sum_wx / sum_w)
1171 
1172  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1173  "x std",
1174  np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1175 
1176  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1177  "y avg",
1178  sum_wy / sum_w)
1179 
1180  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1181  "y std",
1182  np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1183 
1184  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1185  'cov',
1186  tgrapherrors.GetCovariance())
1187 
1188  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1189  'corr',
1190  tgrapherrors.GetCorrelationFactor())
1191 
1192  return tgrapherrors
1193 
1195  histogram_template,
1196  xs,
1197  ys=None,
1198  weights=None,
1199  groupbys=None,
1200  groupby_label="group"):
1201  """Fill data into similar histograms in groups indicated by a groupby array"""
1202 
1203  histograms = []
1204  unique_groupbys = np.unique(groupbys)
1205  name = histogram_template.GetName()
1206 
1207  for i_value, value in enumerate(unique_groupbys):
1208  if np.isnan(value):
1209  indices_for_value = np.isnan(groupbys)
1210  else:
1211  indices_for_value = groupbys == value
1212 
1213  # Make a copy of the empty histogram
1214  histogram_for_value = histogram_template.Clone(name + '_' + str(value))
1215  i_root_color = i_value + 1
1216 
1217  self.set_colorset_color(histogram_for_value, i_root_color)
1218 
1219  if groupby_label:
1220  self.add_stats_entryadd_stats_entry(histogram_for_value, groupby_label, value)
1221 
1222  self.fill_intofill_into(histogram_for_value,
1223  xs,
1224  ys=ys,
1225  weights=weights,
1226  filter=indices_for_value)
1227 
1228  histograms.append(histogram_for_value)
1229 
1230  return histograms
1231 
1232  def set_color(self, tobject, root_i_color):
1233  """Set the color of the ROOT object.
1234 
1235  By default the line color of a TGraph should be invisible, so do not change it
1236  For other objects set the marker and the line color
1237 
1238  Parameters
1239  ----------
1240  tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1241  Object of which the color should be set.
1242  root_i_color : int
1243  Color index of the ROOT color table
1244  """
1245  if isinstance(tobject, ROOT.TGraph):
1246  tobject.SetMarkerColor(root_i_color)
1247  else:
1248  tobject.SetLineColor(root_i_color)
1249  tobject.SetMarkerColor(root_i_color)
1250 
1251  def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1252  """Fill the data into the plot object"""
1253  if isinstance(plot, ROOT.TGraph):
1254  if ys is None:
1255  raise ValueError("ys are required for filling a graph")
1256  self.fill_into_tgraphfill_into_tgraph(plot, xs, ys, filter=filter)
1257  elif isinstance(plot, ROOT.TGraphErrors):
1258  if ys is None:
1259  raise ValueError("ys are required for filling a graph error")
1260 
1261  self.fill_into_tgrapherrorfill_into_tgrapherror(plot, xs, ys)
1262  else:
1263  self.fill_into_th1fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1264 
1265  def fill_into_tgrapherror(self, graph, xs, ys, filter=None):
1266  """fill point values and error of the x and y axis into the graph"""
1267 
1268  assert(len(xs[0]) == len(ys[0]))
1269  # set the overall amount of points
1270  graph.Set(len(xs[0]))
1271 
1272  for i in range(len(xs[0])):
1273  graph.SetPoint(i, xs[0][i], ys[0][i])
1274  graph.SetPointError(i, xs[1][i], ys[1][i])
1275 
1276  def fill_into_tgraph(self, graph, xs, ys, filter=None):
1277  """Fill the data into a TGraph"""
1278 
1279  # Save some ifs by introducing a dummy slicing as a non filter
1280  if filter is None:
1281  filter = slice(None)
1282 
1283  xs = xs[filter]
1284  ys = ys[filter]
1285 
1286  max_n_data = 100000
1287  x_n_data = len(xs)
1288  y_n_data = len(ys)
1289 
1290  if max_n_data:
1291  if x_n_data > max_n_data or y_n_data > max_n_data:
1292  get_logger().warning("Number of points in scatter graph %s exceed limit %s" %
1293  (self.namename, max_n_data))
1294 
1295  get_logger().warning("Cropping %s" % max_n_data)
1296 
1297  xs = xs[0:max_n_data]
1298  ys = ys[0:max_n_data]
1299 
1300  x_axis = graph.GetXaxis()
1301  y_axis = graph.GetYaxis()
1302 
1303  x_lower_bound = x_axis.GetXmin()
1304  x_upper_bound = x_axis.GetXmax()
1305 
1306  y_lower_bound = y_axis.GetXmin()
1307  y_upper_bound = y_axis.GetXmax()
1308 
1309  x_underflow_indices = xs < x_lower_bound
1310  x_overflow_indices = xs > x_upper_bound
1311 
1312  y_underflow_indices = ys < y_lower_bound
1313  y_overflow_indices = ys > y_upper_bound
1314 
1315  plot_indices = ~(np.isnan(xs) |
1316  x_underflow_indices |
1317  x_overflow_indices |
1318  np.isnan(ys) |
1319  y_underflow_indices |
1320  y_overflow_indices)
1321 
1322  n_plot_data = np.sum(plot_indices)
1323  plot_xs = xs[plot_indices]
1324  plot_ys = ys[plot_indices]
1325 
1326  graph.Set(int(n_plot_data))
1327  for i, (x, y) in enumerate(zip(plot_xs, plot_ys)):
1328  graph.SetPoint(i, x, y)
1329 
1330  self.add_stats_entryadd_stats_entry(graph, 'count', np.sum(np.isfinite(xs)))
1331 
1332  self.add_nan_inf_statsadd_nan_inf_stats(graph, 'x', xs)
1333  self.add_nan_inf_statsadd_nan_inf_stats(graph, 'y', ys)
1334 
1335  x_n_underflow = np.sum(x_underflow_indices)
1336  if x_n_underflow:
1337  self.add_stats_entryadd_stats_entry(graph, 'x underf.', x_n_underflow)
1338 
1339  x_n_overflow = np.sum(x_overflow_indices)
1340  if x_n_overflow:
1341  self.add_stats_entryadd_stats_entry(graph, 'x overf.', x_n_overflow)
1342 
1343  y_n_underflow = np.sum(y_underflow_indices)
1344  if y_n_underflow:
1345  self.add_stats_entryadd_stats_entry(graph, 'y underf.', y_n_underflow)
1346 
1347  y_n_overflow = np.sum(y_overflow_indices)
1348  if y_n_overflow:
1349  self.add_stats_entryadd_stats_entry(graph, 'y overf.', y_n_overflow)
1350 
1351  self.add_stats_entryadd_stats_entry(graph, 'x avg', graph.GetMean(1))
1352  self.add_stats_entryadd_stats_entry(graph, 'x std', graph.GetRMS(1))
1353 
1354  self.add_stats_entryadd_stats_entry(graph, 'y avg', graph.GetMean(2))
1355  self.add_stats_entryadd_stats_entry(graph, 'y std', graph.GetRMS(2))
1356 
1357  self.add_stats_entryadd_stats_entry(graph, 'cov', graph.GetCovariance())
1358  self.add_stats_entryadd_stats_entry(graph, 'corr', graph.GetCorrelationFactor())
1359 
1360  def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None):
1361  """Fill the histogram blocking non finite values
1362 
1363  Parameters
1364  ----------
1365  histogram : ROOT.TH1
1366  The histogram to be filled
1367  xs : numpy.ndarray (1d)
1368  Data for the first axes
1369  ys : numpy.ndarray (1d), optional
1370  Data for the second axes
1371  weights : numpy.ndarray (1d), optional
1372  Weight of the individual points. Defaults to one for each
1373  filter : numpy.ndarray, optional
1374  Boolean index array indicating which entries shall be taken.
1375  """
1376  # Save some ifs by introducing a dummy slicing as a non filter
1377  if filter is None:
1378  filter = slice(None)
1379 
1380  xs = xs[filter]
1381  # Count the nan and inf values in x
1382  self.add_nan_inf_statsadd_nan_inf_stats(histogram, 'x', xs)
1383  finite_filter = np.isfinite(xs)
1384 
1385  if ys is not None:
1386  ys = ys[filter]
1387  # Count the nan and inf values in y
1388  self.add_nan_inf_statsadd_nan_inf_stats(histogram, 'y', ys)
1389  finite_filter &= np.isfinite(ys)
1390 
1391  if weights is None:
1392  xs = xs[finite_filter]
1393  weights = np.ones_like(xs)
1394  else:
1395  weights = weights[filter]
1396  self.add_nan_inf_statsadd_nan_inf_stats(histogram, 'w', weights)
1397  finite_filter &= np.isfinite(weights)
1398  xs = xs[finite_filter]
1399  weights[finite_filter]
1400 
1401  if ys is not None:
1402  ys = ys[finite_filter]
1403 
1404  # Now fill the actual histogram
1405  try:
1406  histogram.FillN
1407  except AttributeError:
1408  Fill = histogram.Fill
1409  if ys is None:
1410  fill = np.frompyfunc(Fill, 2, 1)
1411  fill(xs.astype(np.float64, copy=False),
1412  weights.astype(np.float64, copy=False))
1413  else:
1414  fill = np.frompyfunc(Fill, 3, 1)
1415  fill(xs.astype(np.float64, copy=False),
1416  ys.astype(np.float64, copy=False),
1417  weights.astype(np.float64, copy=False))
1418  else:
1419  if ys is None:
1420  # Make the array types compatible with the ROOT interface if necessary
1421  xs = xs.astype(np.float64, copy=False)
1422  weights = weights.astype(np.float64, copy=False)
1423  n = len(xs)
1424  if n != 0:
1425  histogram.FillN(n, xs, weights)
1426  else:
1427  basf2.B2WARNING("No values to be filled into histogram: " + self.namename)
1428 
1429  else:
1430  # Make the array types compatible with the ROOT interface if necessary
1431  xs = xs.astype(np.float64, copy=False)
1432  ys = ys.astype(np.float64, copy=False)
1433  weights = weights.astype(np.float64, copy=False)
1434  n = len(xs)
1435  if n != 0:
1436  histogram.FillN(n, xs, ys, weights)
1437  else:
1438  basf2.B2WARNING("No values to be filled into histogram: " + self.namename)
1439 
1440  self.set_additional_stats_tf1set_additional_stats_tf1(histogram)
1441 
1442  @classmethod
1443  def add_nan_inf_stats(cls, histogram, name, xs):
1444  """ Extracts the counts of non finite floats from a series
1445  and adds them as additional statistics to the histogram.
1446 
1447  Parameters
1448  ----------
1449  histogram : derived from ROOT.TH1 or ROOT.TGraph
1450  Something having a GetListOfFunctions method that
1451  name : str
1452  A label for the data series to be prefixed to the entries.
1453  xs : numpy.ndarray (1d)
1454  Data from which the non finit floats should be counted.
1455  """
1456  n_nans = np.isnan(xs).sum()
1457  if n_nans > 0:
1458  cls.add_stats_entryadd_stats_entry(histogram, name + ' nan', n_nans)
1459 
1460  n_positive_inf = np.sum(xs == np.inf)
1461  if n_positive_inf > 0:
1462  cls.add_stats_entryadd_stats_entry(histogram, name + ' pos inf', n_positive_inf)
1463 
1464  n_negative_inf = np.sum(xs == -np.inf)
1465  if n_negative_inf > 0:
1466  cls.add_stats_entryadd_stats_entry(histogram, name + ' neg inf', n_negative_inf)
1467 
1468  @classmethod
1469  def add_stats_entry(cls, histogram, label, value):
1470  """Add a new additional statistics to the histogram.
1471 
1472  Parameters
1473  ----------
1474  histogram : derived from ROOT.TH1 or ROOT.TGraph
1475  Something having a GetListOfFunctions method that holds the additional statistics
1476  label : str
1477  Label of the statistic
1478  value : float
1479  Value of the statistic
1480  """
1481  stats_entry = StatsEntry(str(label), float(value))
1482  histogram.GetListOfFunctions().Add(stats_entry)
1483  cls.set_additional_stats_tf1set_additional_stats_tf1(histogram)
1484 
1485  @classmethod
1486  def get_additional_stats(cls, histogram):
1487  """Get the additional statistics from the histogram and return them a dict.
1488 
1489  Parameters
1490  ----------
1491  histogram : derived from ROOT.TH1 or ROOT.TGraph
1492  Something having a GetListOfFunctions method that holds the additional statistics
1493 
1494  Returns
1495  -------
1496  collection.OrderedDict
1497  A map of labels to values for the additional statistics
1498  """
1499  additional_stats = collections.OrderedDict()
1500  for tobject in histogram.GetListOfFunctions():
1501  if isinstance(tobject, StatsEntry):
1502  stats_entry = tobject
1503  label = stats_entry.GetName()
1504  value = stats_entry.GetVal()
1505  additional_stats[label] = value
1506  return additional_stats
1507 
1508  @classmethod
1509  def gaus_slice_fit(cls, th2, name, z_score=None):
1510  """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1511  # profile = th2.ProfileX(name)
1512 
1513  y_taxis = th2.GetYaxis()
1514  th2_lower_bound = y_taxis.GetXmin()
1515  th2_upper_bound = y_taxis.GetXmax()
1516  th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1517  n_y_bins = y_taxis.GetNbins()
1518  if z_score:
1519  y_mean = th2.GetMean(2)
1520  y_std = th2.GetStdDev(2)
1521  fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1522  fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1523  fit_height = fit_upper_bound - fit_lower_bound
1524  # Require all covered bins to be filled
1525  required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1526  else:
1527  fit_lower_bound = th2_lower_bound
1528  fit_upper_bound = th2_upper_bound
1529  fit_height = fit_upper_bound - fit_lower_bound
1530  required_n_bins_inslice_filled = n_y_bins / 1.61
1531 
1532  # Highest required number of bins should be a third
1533  required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1534 
1535  fit_tf1 = ROOT.TF1("Fit", "gaus", fit_lower_bound, fit_upper_bound)
1536  fit_tf1.SetParName(0, "n")
1537  fit_tf1.SetParName(1, "mean")
1538  fit_tf1.SetParName(2, "std")
1539  i_first_bin = 0
1540  i_last_bin = -1
1541  fit_options = "QNR"
1542  param_fit_th1s = ROOT.TObjArray()
1543  th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1544  int(required_n_bins_inslice_filled),
1545  fit_options, param_fit_th1s)
1546 
1547  th1_means = param_fit_th1s.At(1)
1548  th1_means.SetName(name)
1549  th1_means.SetTitle(th2.GetTitle())
1550  for label, value in cls.get_additional_statsget_additional_stats(th2).items():
1551  cls.add_stats_entryadd_stats_entry(th1_means, label, value)
1552 
1553  # Manually copy labels grumble grumble
1554  x_taxis = th2.GetXaxis()
1555  new_x_taxis = th1_means.GetXaxis()
1556  for i_bin in range(x_taxis.GetNbins() + 2):
1557  label = x_taxis.GetBinLabel(i_bin)
1558  if label != "":
1559  new_x_taxis.SetBinLabel(i_bin, label)
1560 
1561  # Adjust plot bound to reflect the fit range.
1562  data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1563  data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1564  data_height = data_upper_bound - data_lower_bound
1565 
1566  plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1567  plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1568 
1569  th1_means.SetMinimum(plot_lower_bound)
1570  th1_means.SetMaximum(plot_upper_bound)
1571 
1572  return th1_means
1573 
1574  @classmethod
1575  def cumulate(cls, histogram, cumulation_direction=None):
1576  """Cumulates the histogram inplace.
1577 
1578  Parameters
1579  ----------
1580  histogram : ROOT.TH1 or ROOT.TProfile
1581  Filled histogram to be cumulated
1582  cumulation_direction : int, optional
1583  Direction is indicated by the sign.
1584  Positive means from left to right, negative means from right to left.
1585  If now cumulation direction is given return the histogram as is.
1586 
1587  Returns
1588  -------
1589  ROOT.TH1
1590  Cumulated histogram potentially altered inplace.
1591  """
1592  if not cumulation_direction:
1593  return histogram
1594 
1595  cumulate_backward = cumulation_direction < 0
1596  cumulate_forward = not cumulate_backward
1597 
1598  if isinstance(histogram, ROOT.TH2):
1599  raise ValueError("Cannot cumulate a two dimensional histogram.")
1600 
1601  if isinstance(histogram, ROOT.TH3):
1602  raise ValueError("Cannot cumulate a three dimensional histogram.")
1603 
1604  if not isinstance(histogram, ROOT.TH1):
1605  raise ValueError("Can only cumulate a one dimensional histogram.")
1606 
1607  if isinstance(histogram, ROOT.TProfile):
1608  tprofile = histogram
1609  # Turn the profile histogram into graph where we can set the new content and errors
1610  tgraph = cls.convert_tprofile_to_tgrapherrorsconvert_tprofile_to_tgrapherrors(histogram)
1611  tgraph.SetName(tprofile.GetName())
1612 
1613  n_bins = histogram.GetNbinsX()
1614  n_points = n_bins
1615  cumulated_content = 0.0
1616  cumulated_entries = 0
1617  cumulated_std = 0.0
1618 
1619  # Always include the overflow bins.
1620  i_bins = list(range(0, n_bins + 2))
1621  if not cumulate_forward:
1622  i_bins = reversed(i_bins)
1623 
1624  for i_bin in i_bins:
1625  i_point = i_bin - 1
1626  bin_content = tprofile.GetBinContent(i_bin)
1627  bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1628  bin_std = tprofile.GetBinError(i_bin)
1629 
1630  if bin_entries != 0:
1631  cumulated_content = (
1632  1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1633  (cumulated_entries + bin_entries)
1634  )
1635 
1636  cumulated_std = (
1637  math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1638  (cumulated_entries + bin_entries)
1639  )
1640 
1641  cumulated_entries = cumulated_entries + bin_entries
1642  else:
1643  # empty bin does not add anything to the cumulation
1644  pass
1645 
1646  if i_point >= 0 and i_point < n_points:
1647  x = tgraph.GetX()[i_point]
1648  # x = ROOT.Double()
1649  # y = ROOT.Double()
1650  # tgraph.GetPoint(i_point, x, y)
1651  tgraph.SetPoint(i_point, x, cumulated_content)
1652 
1653  x_error = tgraph.GetErrorX(i_point)
1654  tgraph.SetPointError(i_point, x_error, cumulated_std)
1655  return tgraph
1656 
1657  else:
1658  # Always include the overflow bins.
1659  n_bins = histogram.GetNbinsX()
1660  cumulated_content = 0.0
1661 
1662  i_bins = list(range(0, n_bins + 2))
1663  if not cumulate_forward:
1664  i_bins = reversed(i_bins)
1665 
1666  for i_bin in i_bins:
1667  bin_content = histogram.GetBinContent(i_bin)
1668  cumulated_content += bin_content
1669  histogram.SetBinContent(i_bin, cumulated_content)
1670 
1671  return histogram
1672 
1674  xs,
1675  stackbys=None,
1676  bins=None,
1677  lower_bound=None,
1678  upper_bound=None,
1679  outlier_z_score=None,
1680  include_exceptionals=True,
1681  allow_discrete=False):
1682  """Deducing bin edges from a data series.
1683 
1684  Parameters
1685  ----------
1686  xs : numpy.ndarray (1d)
1687  Data point for which a binning should be found.
1688  stackbys : numpy.ndarray (1d)
1689  Categories of the data points to be distinguishable
1690  bins : list(float) or int or None, optional
1691  Preset bin edges or preset number of desired bins.
1692  The default, None, means the bound should be extracted from data.
1693  The rice rule is used the determine the number of bins.
1694  If a list of floats is given return them immediatly.
1695  lower_bound : float or None, optional
1696  Preset lower bound of the binning range.
1697  The default, None, means the bound should be extracted from data.
1698  upper_bound : float or None, optional
1699  Preset upper bound of the binning range.
1700  The default, None, means the bound should be extracted from data.
1701  outlier_z_score : float or None, optional
1702  Threshold z-score of outlier detection.
1703  The default, None, means no outlier detection.
1704  include_exceptionals : bool, optional
1705  If the outlier detection is active this switch indicates,
1706  if values detected as exceptionally frequent shall be included
1707  nevertheless into the binning range. Default is True,
1708  which means exceptionally frequent values as included
1709  even if they are detected as outliers.
1710 
1711  Returns
1712  -------
1713  np.array (1d), list(str)
1714  Pair of bin edges and labels deduced from the series.
1715  Second element is None if the series is not detected as discrete.
1716  """
1717  debug = get_logger().debug
1718  debug('Determine binning for plot named %s', self.namename)
1719 
1720  if bins == 'flat':
1721  # Special value for the flat distribution binning
1722  n_bins = None
1723 
1724  elif isinstance(bins, collections.Iterable):
1725  # Bins is considered as an array
1726  # Construct a float array forwardable to root.
1727  bin_edges = bins
1728  bin_edges = array.array('d', bin_edges)
1729  bin_labels = None
1730  return bin_edges, bin_labels
1731 
1732  # If bins is not an iterable assume it is the number of bins or None
1733  elif bins is None:
1734  n_bins = None
1735  else:
1736  # Check that bins can be coerced to an integer.
1737  n_bins = int(bins)
1738 
1739  # Do not allow negative bin numbers
1740  if not n_bins > 0:
1741  message = 'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % bins
1742  raise ValueError(message)
1743 
1744  # Coerce values to a numpy array. Do not copy if already a numpy array.
1745  xs = np.array(xs, copy=False)
1746 
1747  if self.is_binaryis_binary(xs) or (allow_discrete and self.is_discreteis_discrete(xs)):
1748  # This covers also the case
1749  debug('Discrete binning values encountered')
1750  finite_xs = xs[np.isfinite(xs)]
1751  unique_xs = np.unique(finite_xs)
1752 
1753  # Crop the unique values between the lower and upper bound
1754  if lower_bound is None:
1755  if len(unique_xs) == 0:
1756  if upper_bound is None:
1757  lower_bound = 0
1758  else:
1759  lower_bound = upper_bound - 1
1760  else:
1761  lower_bound = np.min(unique_xs)
1762  else:
1763  unique_xs = unique_xs[unique_xs >= lower_bound]
1764 
1765  if upper_bound is None:
1766  if len(unique_xs) == 0:
1767  upper_bound = lower_bound + 1
1768  else:
1769  upper_bound = np.min(unique_xs)
1770  else:
1771  unique_xs = unique_xs[unique_xs <= upper_bound]
1772 
1773  if n_bins is None:
1774  n_bins = len(unique_xs) or 1
1775 
1776  if len(unique_xs) > 0 and n_bins >= len(unique_xs):
1777  # Construct a float array forwardable to root.
1778  bin_edges = array.array('d', unique_xs)
1779  format_bin_label = self.format_bin_labelformat_bin_label
1780  bin_labels = [format_bin_label(value) for value in bin_edges]
1781  bin_edges.append(bin_edges[-1] + 1)
1782  return bin_edges, bin_labels
1783 
1784  else:
1785  # Ambiguous case what to do in case of a number of requested bins
1786  # that is lower than the number of unique values?
1787 
1788  # Continue with an equistant binning for now.
1789  pass
1790 
1791  debug('Lower bound %s', lower_bound)
1792  debug('Upper bound %s', upper_bound)
1793  debug('N bins %s', n_bins)
1794 
1795  else:
1796  bin_range = self.determine_bin_rangedetermine_bin_range(xs,
1797  stackbys=stackbys,
1798  n_bins=n_bins,
1799  lower_bound=lower_bound,
1800  upper_bound=upper_bound,
1801  outlier_z_score=outlier_z_score,
1802  include_exceptionals=include_exceptionals)
1803 
1804  n_bins, lower_bound, upper_bound = bin_range
1805 
1806  n_bin_edges = n_bins + 1
1807  if lower_bound != upper_bound:
1808  if bins == "flat":
1809  debug("Creating flat distribution binning")
1810  precentiles = np.linspace(0.0, 100.0, n_bin_edges)
1811  bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], precentiles))
1812  else:
1813  # Correct the upper bound such that all values are strictly smaller than the upper bound
1814  # Make one step in single precision in the positive direction
1815  bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1816 
1817  # Reinforce the upper and lower bound to be exact
1818  # Also expand the upper bound by an epsilon
1819  # to prevent the highest value in xs from going in the overflow bin
1820  bin_edges[0] = lower_bound
1821  bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1822  debug('Bins %s', bin_edges)
1823 
1824  else:
1825  # Fall back if the array contains only one value
1826  bin_edges = [lower_bound, upper_bound + 1]
1827 
1828  # Construct a float array forwardable to root.
1829  bin_edges = array.array('d', bin_edges)
1830  debug('Bins %s for %s', bin_edges, self.namename)
1831  return bin_edges, None
1832 
1834  xs,
1835  stackbys=None,
1836  n_bins=None,
1837  lower_bound=None,
1838  upper_bound=None,
1839  outlier_z_score=None,
1840  include_exceptionals=True):
1841  """Calculates the number of bins, the lower bound and the upper bound from a given data series
1842  estimating the values that are not given.
1843 
1844  If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1845  The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1846  mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1847  quantile range (IQR).
1848 
1849  If additional include_exceptionals is true the method tries to find exceptional values in the series
1850  and always include them in the range if it finds any.
1851  Exceptional values means exact values that appear often in the series for whatever reason.
1852  Possible reasons include
1853  * Interal / default values
1854  * Failed evaluation conditions
1855  * etc.
1856  which should be not cropped away automatically if you are locking on the quality of your data.
1857 
1858  Parameters
1859  ----------
1860  xs : numpy.ndarray (1d)
1861  Data point for which a binning should be found.
1862  stackbys : numpy.ndarray (1d)
1863  Categories of the data points to be distinguishable
1864  n_bins : int or None, optional
1865  Preset number of desired bins. The default, None, means the bound should be extracted from data.
1866  The rice rule is used the determine the number of bins.
1867  lower_bound : float or None, optional
1868  Preset lower bound of the binning range.
1869  The default, None, means the bound should be extracted from data.
1870  upper_bound : float or None, optional
1871  Preset upper bound of the binning range.
1872  The default, None, means the bound should be extracted from data.
1873  outlier_z_score : float or None, optional
1874  Threshold z-score of outlier detection.
1875  The default, None, means no outlier detection.
1876  include_exceptionals : bool, optional
1877  If the outlier detection is active this switch indicates,
1878  if values detected as exceptionally frequent shall be included
1879  nevertheless into the binning range. Default is True,
1880  which means exceptionally frequent values as included
1881  even if they are detected as outliers.
1882 
1883  Returns
1884  -------
1885  n_bins, lower_bound, upper_bound : int, float, float
1886  A triple of found number of bins, lower bound and upper bound of the binning range.
1887  """
1888 
1889  if stackbys is not None:
1890  unique_stackbys = np.unique(stackbys)
1891  stack_ranges = []
1892  for value in unique_stackbys:
1893  if np.isnan(value):
1894  indices_for_value = np.isnan(stackbys)
1895  else:
1896  indices_for_value = stackbys == value
1897 
1898  stack_lower_bound, stack_upper_bound = \
1899  self.determine_rangedetermine_range(xs[indices_for_value],
1900  lower_bound=lower_bound,
1901  upper_bound=upper_bound,
1902  outlier_z_score=outlier_z_score,
1903  include_exceptionals=include_exceptionals)
1904 
1905  stack_ranges.append([stack_lower_bound, stack_upper_bound])
1906 
1907  lower_bound = np.nanmin([lwb for lwb, upb in stack_ranges])
1908  upper_bound = np.nanmax([upb for lwb, upb in stack_ranges])
1909 
1910  else:
1911  lower_bound, upper_bound = self.determine_rangedetermine_range(xs,
1912  lower_bound=lower_bound,
1913  upper_bound=upper_bound,
1914  outlier_z_score=outlier_z_score,
1915  include_exceptionals=include_exceptionals)
1916 
1917  if n_bins is None:
1918  # Assume number of bins according to the rice rule.
1919  # The number of data points should not include outliers.
1920  n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1921  rice_n_bins = int(statistics.rice_n_bin(n_data))
1922  n_bins = rice_n_bins
1923 
1924  else:
1925  n_bins = int(n_bins)
1926  # Do not allow negative bin numbers
1927  if not n_bins > 0:
1928  message = 'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % n_bins
1929  raise ValueError(message)
1930 
1931  return n_bins, lower_bound, upper_bound
1932 
1934  xs,
1935  lower_bound=None,
1936  upper_bound=None,
1937  outlier_z_score=None,
1938  include_exceptionals=True):
1939  """
1940  Parameters
1941  ----------
1942  xs : numpy.ndarray (1d)
1943  Data point for which a binning should be found.
1944  lower_bound : float or None, optional
1945  Preset lower bound of the binning range.
1946  The default, None, means the bound should be extracted from data.
1947  upper_bound : float or None, optional
1948  Preset upper bound of the binning range.
1949  The default, None, means the bound should be extracted from data.
1950  outlier_z_score : float or None, optional
1951  Threshold z-score of outlier detection.
1952  The default, None, means no outlier detection.
1953  include_exceptionals : bool, optional
1954  If the outlier detection is active this switch indicates,
1955  if values detected as exceptionally frequent shall be included
1956  nevertheless into the binning range. Default is True,
1957  which means exceptionally frequent values as included
1958  even if they are detected as outliers.
1959 
1960  Returns
1961  -------
1962  lower_bound, upper_bound : float, float
1963  A pair of found lower bound and upper bound of series.
1964  """
1965  debug = get_logger().debug
1966 
1967  finite_xs_indices = np.isfinite(xs)
1968  if np.any(finite_xs_indices):
1969  finite_xs = xs[finite_xs_indices]
1970  else:
1971  finite_xs = xs
1972 
1973  make_symmetric = False
1974  exclude_outliers = outlier_z_score is not None and (lower_bound is None or upper_bound is None)
1975 
1976  # Look for exceptionally frequent values in the series, e.g. interal delta values like -999
1977  if include_exceptionals or exclude_outliers:
1978  exceptional_xs = self.get_exceptional_valuesget_exceptional_values(finite_xs)
1979  exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1980 
1981  # Prepare for the estimation of outliers
1982  if exclude_outliers:
1983  if not np.all(exceptional_indices):
1984  # Exclude excecptional values from the estimation to be unbiased
1985  # even in case exceptional values fall into the central region near the mean
1986  x_mean, x_std = self.get_robust_mean_and_stdget_robust_mean_and_std(finite_xs[~exceptional_indices])
1987  else:
1988  x_mean, x_std = np.nan, np.nan
1989 
1990  make_symmetric = abs(x_mean) < x_std / 5.0 and lower_bound is None and upper_bound is None
1991 
1992  if include_exceptionals and len(exceptional_xs) != 0:
1993  lower_exceptional_x = np.min(exceptional_xs)
1994  upper_exceptional_x = np.max(exceptional_xs)
1995  make_symmetric = False
1996  else:
1997  lower_exceptional_x = np.nan
1998  upper_exceptional_x = np.nan
1999 
2000  # Find the lower bound, if it is not given.
2001  if lower_bound is None:
2002  try:
2003  lower_bound = np.min(finite_xs)
2004  except ValueError:
2005  lower_bound = -999
2006  # Clip the lower bound by outliers that exceed the given z score
2007  if outlier_z_score is not None:
2008  # The lower bound at which outliers exceed the given z score
2009  lower_outlier_bound = x_mean - outlier_z_score * x_std
2010 
2011  # Clip the lower bound such that it concides with an actual value,
2012  # which prevents empty bins from being produced
2013  indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2014 
2015  if np.any(indices_above_lower_outlier_bound):
2016  lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2017 
2018  # However we want to include at least the exceptional values in the range if there are any.
2019  lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2020 
2021  debug('Lower bound after outlier detection')
2022  debug('Lower bound %s', lower_bound)
2023  debug('Lower outlier bound %s', lower_outlier_bound)
2024 
2025  # Find the upper bound, if it is not given
2026  if upper_bound is None:
2027  try:
2028  upper_bound = np.max(finite_xs)
2029  except ValueError:
2030  upper_bound = 999
2031  if outlier_z_score is not None:
2032  # The upper bound at which outliers exceed the given z score
2033  upper_outlier_bound = x_mean + outlier_z_score * x_std
2034 
2035  # Clip the upper bound such that it concides with an actual value,
2036  # which prevents empty bins from being produced
2037  indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2038 
2039  if np.any(indices_below_upper_outlier_bound):
2040  upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2041 
2042  # However we want to include at least the exceptional values in the range if there are any.
2043  upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2044 
2045  debug('Upper bound after outlier detection')
2046  debug('Upper bound %s', upper_bound)
2047  debug('Upper outlier bound %s', upper_outlier_bound)
2048 
2049  if make_symmetric and lower_bound < 0 and upper_bound > 0:
2050  if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2051  abs_bound = max(abs(lower_bound), abs(upper_bound))
2052  lower_bound = -abs_bound
2053  upper_bound = abs_bound
2054 
2055  return lower_bound, upper_bound
2056 
2057  @classmethod
2058  def set_additional_stats_tf1(cls, histogram):
2059  """Combining fit TF1 with the additional statistics and attach them to the histogram.
2060 
2061  Parameters
2062  ----------
2063  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2064  Something having a GetListOfFunctions method that should hold
2065  the combined fit and additional statistics function.
2066  """
2067  additional_stats_tf1 = cls.create_additional_stats_tf1create_additional_stats_tf1(histogram)
2068  cls.set_tf1set_tf1(histogram, additional_stats_tf1)
2069 
2070  @classmethod
2071  def set_fit_tf1(cls, histogram, fit_tf1):
2072  """Combining fit TF1 with the additional statistics and attach them to the histogram.
2073 
2074  Parameters
2075  ----------
2076  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2077  Something having a GetListOfFunctions method that should hold
2078  the combined fit and additional statistics function.
2079  """
2080  additional_stats_tf1 = cls.create_additional_stats_tf1create_additional_stats_tf1(histogram)
2081  combined_tf1 = cls.combine_fit_and_additional_statscombine_fit_and_additional_stats(fit_tf1, additional_stats_tf1)
2082  cls.set_tf1set_tf1(histogram, combined_tf1)
2083 
2084  @classmethod
2085  def set_tf1(cls, histogram, tf1):
2086  """Set the attached TF1 of the histogram.
2087 
2088  Parameters
2089  ----------
2090  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2091  Something having a GetListOfFunctions method that should hold
2092  the combined fit and additional statistics function.
2093  """
2094  # Delete any functions formally added
2095  cls.delete_tf1delete_tf1(histogram)
2096  if tf1 is not None:
2097  tf1.SetName("FitAndStats")
2098  histogram.GetListOfFunctions().Add(tf1)
2099 
2100  @classmethod
2101  def delete_tf1(cls, histogram):
2102  """Delete the attached TF1 from the histogram
2103 
2104  Parameters
2105  ----------
2106  histogram : ROOT.TH1 or ROOT.TGraph
2107  Something having a GetListOfFunctions method that holds the fit function
2108  """
2109  tf1 = histogram.FindObject("FitAndStats")
2110  if tf1:
2111  function_list = histogram.GetListOfFunctions()
2112  function_list.Remove(tf1)
2113 
2114  @classmethod
2115  def create_additional_stats_tf1(cls, histogram):
2116  """Create a TF1 with the additional statistics from the histogram as parameters.
2117 
2118  Parameters
2119  ----------
2120  histogram : ROOT.TH1 or ROOT.TGraph
2121  Something having a GetListOfFunctions method that holds the additional statistics.
2122 
2123  Returns
2124  -------
2125  ROOT.TF1
2126  Function with the additional statistics as parameters.
2127  """
2128 
2129  additional_stats = cls.get_additional_statsget_additional_stats(histogram)
2130  if not additional_stats:
2131  return None
2132 
2133  # Create dummy function, which displays additional statistics in the legend, when added to a histogram.
2134  # Dummy range to serve the functions
2135  lower_bound = 0
2136  upper_bound = 0
2137 
2138  # Create a formula which is zero in all cases but has space for n parameters
2139  # Formula string looks like 0*[0]+0*[1]+0*[2]+...
2140  formula_string = '+'.join('0*[' + str(i) + ']' for i in range(len(additional_stats)))
2141 
2142  # Compose a function that carries the addtional information
2143  additional_stats_tf1 = ROOT.TF1("Stats", formula_string, lower_bound, upper_bound)
2144 
2145  for (i, (label, value)) in enumerate(additional_stats.items()):
2146  # root 6 does not like labels with spaces ..
2147  # bug report: https://sft.its.cern.ch/jira/browse/ROOT-7460
2148  # therefor this workaround:
2149  label = label.replace(" ", "-")
2150  additional_stats_tf1.SetParName(i, label)
2151  additional_stats_tf1.FixParameter(i, value)
2152 
2153  return additional_stats_tf1
2154 
2155  @classmethod
2156  def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1):
2157  """Combine the fit function and the function carrying the additional statistics to one function.
2158 
2159  Parameters
2160  ----------
2161  fit_tf1 : ROOT.TF1
2162  The fit function
2163  additional_stats_tf1 : ROOT.TF1
2164  The function carrying the additional statistics as parameters
2165 
2166  Returns
2167  -------
2168  ROOT.TF1
2169  """
2170  if additional_stats_tf1 is None:
2171  return fit_tf1
2172 
2173  # Combine both TF1 functions
2174  # Get the lower and upper bound of the fit
2175  # Use the pass-by reference containers from pyROOT to be able to call the function
2176  lower_bound = ctypes.c_double()
2177  upper_bound = ctypes.c_double()
2178  fit_tf1.GetRange(lower_bound, upper_bound)
2179  title = fit_tf1.GetTitle()
2180 
2181  combined_formula = additional_stats_tf1.GetExpFormula().Data() + '+' + fit_tf1.GetExpFormula().Data()
2182  combined_tf1 = ROOT.TF1("Combined", combined_formula, lower_bound.value, upper_bound.value)
2183  combined_tf1.SetTitle(title)
2184 
2185  # Transfer the fitted parameters
2186  chi2 = fit_tf1.GetChisquare()
2187  combined_tf1.SetChisquare(chi2)
2188 
2189  ndf = fit_tf1.GetNDF()
2190  combined_tf1.SetNDF(ndf)
2191 
2192  n_stats_parameters = additional_stats_tf1.GetNpar()
2193  # n_fit_parameters = fit_tf1.GetNpar()
2194  cls.copy_tf1_parameterscopy_tf1_parameters(additional_stats_tf1, combined_tf1)
2195  cls.copy_tf1_parameterscopy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2196 
2197  return combined_tf1
2198 
2199  @classmethod
2200  def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0):
2201  """Copy the parameters of one TF1 to another.
2202 
2203  Parameters
2204  ----------
2205  tf1_source : ROOT.TF1
2206  Take parameters from here
2207  tf1_target : ROOT.TF1
2208  Copy them to here.
2209  offset : int, optional
2210  Index of the first target parameter to which to copy.
2211  """
2212  n_parameters = tf1_source.GetNpar()
2213 
2214  # Helper variables for pyROOT's mechanism to call functions by reference
2215  lower_bound = ctypes.c_double()
2216  upper_bound = ctypes.c_double()
2217 
2218  for i_source in range(n_parameters):
2219  parameter_name = tf1_source.GetParName(i_source)
2220  i_target = tf1_target.GetParNumber(parameter_name)
2221 
2222  # Workaround for a ROOT bug
2223  if i_target == -1:
2224  for i_target in range(tf1_target.GetNpar()):
2225  if parameter_name == tf1_target.GetParName(i_target):
2226  break
2227  else:
2228  i_target = -1
2229  continue
2230 
2231  tf1_target.SetParameter(i_target,
2232  tf1_source.GetParameter(i_source))
2233  tf1_target.SetParError(i_target,
2234  tf1_source.GetParError(i_source))
2235 
2236  tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2237  tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2238 
2240  """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2241  # Forward the attributes to the plot by auto assignment
2242 
2243  self.checkcheckcheckcheck = self.checkcheckcheckcheck
2244 
2246 
2248 
2249 
2250  self.xlabelxlabelxlabelxlabel = self.xlabelxlabelxlabelxlabel
2251 
2252  self.ylabelylabelylabelylabel = self.ylabelylabelylabelylabel
2253 
2254  self.titletitletitletitle = self.titletitletitletitle
2255 
2256  def set_maximum(self, maximum):
2257  """Sets the maximum of the vertical plotable range"""
2258  for histogram in self.histogramshistograms:
2259  if isinstance(histogram, ROOT.TH1):
2260  histogram.SetMaximum(histogram.GetMaximum(maximum))
2261  else:
2262  histogram.SetMaximum(maximum)
2263 
2264  def set_minimum(self, minimum):
2265  """Sets the minimum of the vertical plotable range"""
2266  for histogram in self.histogramshistograms:
2267  if isinstance(histogram, ROOT.TH1):
2268  histogram.SetMinimum(histogram.GetMinimum(minimum))
2269  else:
2270  histogram.SetMinimum(minimum)
2271 
2272  @classmethod
2273  def set_tstyle(cls):
2274  """Set the style such that the additional stats entries are shown by the TBrowser"""
2275  belle2_validation_style_name = "belle2_validation_style"
2276  belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2277  if not belle2_validation_tstyle:
2278  belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2279 
2280  opt_fit = 112
2281  belle2_validation_tstyle.SetOptFit(opt_fit)
2282 
2283  opt_stat = 111111
2284  belle2_validation_tstyle.SetOptStat(opt_stat)
2285  ROOT.gROOT.SetStyle(belle2_validation_style_name)
2286 
2287  # belle2_validation_tstyle.SetLineStyleString(cls.very_sparse_dots_line_style_index, "4 2000")
2288 
2289  else:
2290  belle2_validation_tstyle.cd()
2291 
2292 
2293 def test():
2294  """Simple test method"""
2295  ValidationPlot.set_tstyle()
2296 
2297  # Test a histogram plot with some nan and inf values
2298  normal_distributed_values = np.random.randn(1000)
2299 
2300  for i in range(10):
2301  normal_distributed_values[i] = np.nan
2302 
2303  for i in range(10, 20):
2304  normal_distributed_values[i] = np.inf
2305 
2306  for i in range(20, 30):
2307  normal_distributed_values[i] = -np.inf
2308 
2309  validation_histogram = ValidationPlot('test_hist')
2310  validation_histogram.hist(normal_distributed_values)
2311  validation_histogram.title = 'A normal distribution'
2312  validation_histogram.xlabel = 'normal'
2313  validation_histogram.ylabel = 'frequency'
2314  validation_histogram.fit_gaus()
2315 
2316  # Test for a cumulated histogram - cumulation from left to right
2317  cumulated_histogram = ValidationPlot('test_cum_hist')
2318  cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2319  cumulated_histogram.title = 'A cumulated normal distribution'
2320  cumulated_histogram.xlabel = 'normal'
2321  cumulated_histogram.ylabel = 'cdf'
2322  cumulated_histogram.show()
2323 
2324  # Test stacked histogram
2325  # Make a random selection of 40%
2326  stackby = np.random.binomial(1.0, 0.40, 1000)
2327  stacked_validation_histogram = ValidationPlot('test_stacked_hist')
2328  stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2329 
2330  # Make a scatterplot with two species.
2331  x = np.random.randn(1000)
2332  y = 3 * np.random.randn(1000)
2333  ones = np.ones_like(x)
2334  angle = np.pi / 2
2335 
2336  x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2337  y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2338 
2339  stacked_validation_scatter = ValidationPlot('test_stacked_scatter')
2340  stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2341 
2342  # Make a profile plot with the two species
2343  stacked_validation_profile = ValidationPlot('test_stacked_profile')
2344  stacked_validation_profile.profile(x1, y1, stackby=stackby)
2345 
2346  # Make a two dimensional histogram with two species
2347  stacked_validation_hist2d = ValidationPlot('test_stacked_hist2d')
2348  stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2349 
2350  # Test a profile with a diagonal fit
2351  x = np.linspace(-1, 1, 1000)
2352  y = x.copy()
2353  x[0] = np.nan
2354  diagonal_plot = ValidationPlot('test_diag')
2355  diagonal_plot.profile(x, y, bins=50)
2356  diagonal_plot.fit_line()
2357 
2358  # Test if cumulation works for profile plots
2359  cumulated_profile = ValidationPlot('test_cum_profile')
2360  cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2361 
2362  tfile = ROOT.TFile('test.root', 'RECREATE')
2363 
2364  validation_histogram.write(tfile)
2365 
2366  with root_cd("expert") as tdirectory1:
2367  diagonal_plot.write(tdirectory1)
2368  cumulated_profile.write(tdirectory1)
2369  cumulated_histogram.write(tdirectory1)
2370 
2371  with root_cd("stacked") as tdirectory2:
2372  stacked_validation_histogram.write(tdirectory2)
2373  stacked_validation_scatter.write()
2374  stacked_validation_profile.write()
2375  stacked_validation_hist2d.write()
2376 
2377  tfile.Close()
2378 
2379  tfile = ROOT.TFile('test.root')
2380  tBrowser = ROOT.TBrowser()
2381  tBrowser.BrowseObject(tfile)
2382  input()
2383  tfile.Close()
2384 
2385 
2386 if __name__ == '__main__':
2387  test()
title
cached value of the title for this plot
Definition: plot.py:2254
pvalue_warn
custom levels for pvalue warnings
Definition: plot.py:233
def hist(self, xs, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, is_expert=True)
Definition: plot.py:252
y_log
Indicator whether the y axes should be displayed as a log scale.
Definition: plot.py:239
def create_stack(cls, histograms, name, reverse_stack, force_graph=False)
Definition: plot.py:1091
def create_additional_stats_tf1(cls, histogram)
Definition: plot.py:2115
description
description of the plot
Definition: plot.py:2247
contact
contact information for this plot
Definition: plot.py:2245
upper_bound
upper right corner of the hisogram
Definition: plot.py:544
def write(self, tdirectory=None)
Definition: plot.py:757
def fit_gaus(self, z_score=None)
Definition: plot.py:618
ylabel
default label for the histogram's Y axis
Definition: plot.py:280
int very_sparse_dots_line_style_index
A an index that reference to a dot spacing such that the line is almost invisible for scatter.
Definition: plot.py:182
def profile(self, xs, ys, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, y_binary=None, y_log=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, gaus_z_score=None, is_expert=True, is_asymmetry=False)
Definition: plot.py:300
def is_discrete(xs, max_n_unique=None)
Definition: plot.py:931
_contact
Contact email address for display on the validation page.
Definition: plot.py:212
pvalue_error
custom levels for pvalue errors
Definition: plot.py:236
def grapherrors(self, xs_and_err, ys_and_err, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
Definition: plot.py:438
def delete_tf1(cls, histogram)
Definition: plot.py:2101
_title
Title of the validation plot.
Definition: plot.py:221
def create_1d(self, th1_factory, xs, ys=None, weights=None, bins=None, stackby=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None)
Definition: plot.py:994
def description(self, description)
Definition: plot.py:865
def __init__(self, name, referenceFileName=None)
Definition: plot.py:185
def add_stats_entry(cls, histogram, label, value)
Definition: plot.py:1469
_xlabel
X axes label of the validation plot.
Definition: plot.py:215
xlabel
cached value of the x-axis label for this plot
Definition: plot.py:2250
def set_tf1(cls, histogram, tf1)
Definition: plot.py:2085
def fill_into(self, plot, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1251
def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1)
Definition: plot.py:2156
def create(self, histogram_template, xs, ys=None, weights=None, stackby=None, cumulation_direction=None, reverse_stack=None)
Definition: plot.py:1055
def set_maximum(self, maximum)
Definition: plot.py:2256
def add_nan_inf_stats(cls, histogram, name, xs)
Definition: plot.py:1443
def set_color(self, tobject, root_i_color)
Definition: plot.py:1232
def cumulate(cls, histogram, cumulation_direction=None)
Definition: plot.py:1575
def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0)
Definition: plot.py:2200
_is_expert
per default all plots are expert and must be set to non-expert explicitly
Definition: plot.py:230
def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None)
Definition: plot.py:685
_description
Description of the plot purpose for display on the validation page.
Definition: plot.py:206
histograms
A list of the histograms that make up the plot.
Definition: plot.py:227
def get_additional_stats(cls, histogram)
Definition: plot.py:1486
_check
Detailed check instructions for display on the validation page.
Definition: plot.py:209
name
A unique name to be used as the name of the ROOT object to be generated.
Definition: plot.py:200
def set_fit_tf1(cls, histogram, fit_tf1)
Definition: plot.py:2071
def set_minimum(self, minimum)
Definition: plot.py:2264
def fill_into_grouped(self, histogram_template, xs, ys=None, weights=None, groupbys=None, groupby_label="group")
Definition: plot.py:1200
plot
The main plot object, may contain one or more (in case of stacked pltos) histograms.
Definition: plot.py:224
def determine_range(self, xs, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1938
def set_additional_stats_tf1(cls, histogram)
Definition: plot.py:2058
def scatter(self, xs, ys, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
Definition: plot.py:379
_ylabel
Y axes label of the validation plot.
Definition: plot.py:218
referenceFileName
name of the reference file, if not None the binning will be read from there
Definition: plot.py:203
def fill_into_tgrapherror(self, graph, xs, ys, filter=None)
Definition: plot.py:1265
def hist2d(self, xs, ys, weights=None, stackby=None, bins=(None, None), lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), allow_discrete=(False, False), quantiles=None, is_expert=True)
Definition: plot.py:502
lower_bound
lower left corner of the histogram
Definition: plot.py:542
def determine_bin_range(self, xs, stackbys=None, n_bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1840
def determine_bin_edges(self, xs, stackbys=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False)
Definition: plot.py:1681
def gaus_slice_fit(cls, th2, name, z_score=None)
Definition: plot.py:1509
check
cached value of the user-check action for this plot
Definition: plot.py:2243
def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1360
def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False)
Definition: plot.py:1123
def fill_into_tgraph(self, graph, xs, ys, filter=None)
Definition: plot.py:1276
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)
Definition: Splitter.h:85