Belle II Software  release-05-02-19
plot.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 import sys
5 import math
6 import itertools
7 import collections
8 import array
9 import numpy as np
10 
11 import ROOT
12 
13 import basf2
14 
15 from tracking.root_utils import root_cd, root_save_name
16 
17 from . import statistics
18 
19 import logging
20 
21 
22 def get_logger():
23  """Get for the logging.Logger instance of this module
24 
25  Returns
26  -------
27  logging.Logger
28  Logger instance of this module
29  """
30  return logging.getLogger(__name__)
31 
32 
33 
34 units_by_quantity_name = {
35  'x': 'cm',
36  'y': 'cm',
37  'z': 'cm',
38  'E': 'GeV',
39  'px': 'GeV',
40  'p_{x}': 'GeV',
41  'py': 'GeV',
42  'p_{y}': 'GeV',
43  'pz': 'GeV',
44  'p_{z}': 'GeV',
45  'pt': 'GeV',
46  'p_{t}': 'GeV',
47  'd0': 'cm',
48  'd_{0}': 'cm',
49  'phi0': None,
50  '#phi_{0}': None,
51  'omega': '1/cm',
52  '#omega': '1/cm',
53  'z0': 'cm',
54  'z_{0}': 'cm',
55  'tan_lambda': None,
56  'tan #lambda': None}
57 
58 
59 def get_unit(quantity_name):
60  """Infers the unit of a quantity from its name.
61 
62  Assumes the standard Belle II unit system.
63 
64  Currently looks up the quantity string from units_by_quantity_name.
65 
66  Parameters
67  ----------
68  quantity_name : str
69  Name of a quantity (E.g. pt, x, ...)
70 
71  Returns
72  -------
73  str
74  """
75 
76  unit = units_by_quantity_name.get(quantity_name, None)
77  return unit
78 
79 
80 def compose_axis_label(quantity_name, unit=None):
81  """Formats a quantity name and a unit to a label for a plot axes.
82 
83  If the unit is not given to is tried to infer it
84  from the quantity name by the get_unit function.
85 
86  Parameters
87  ----------
88  quantity_name : str
89  Name of the quantity to be displayed at the axes
90  unit : str, optional
91  The unit of the quantity. Defaults to get_unit(quantity_name)
92 
93  Returns
94  -------
95  str
96  """
97 
98  if unit is None:
99  unit = get_unit(quantity_name)
100 
101  if unit is None:
102  axis_label = quantity_name
103  else:
104  axis_label = '%s (%s)' % (quantity_name, unit)
105 
106  return axis_label
107 
108 
109 def get1DBinningFromReference(name, refFileName):
110  """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
111 
112  @param name : name of the TH1 object to be looked for in the file
113  @param refFileName : name of the reference file where the object is searched for
114 
115  @return int nbin, float xmin, float xmax of the TH1
116  """
117 
118  nbins = None
119  x_min = None
120  x_max = None
121 
122  if refFileName is None or refFileName == "":
123  return nbins, x_min, x_max
124 
125  # store current directory to not confuse directories by opening a TFile
126  oldDirectory = ROOT.gROOT.CurrentDirectory()
127 
128  tfile = ROOT.TFile(refFileName)
129  if tfile.IsOpen():
130  objptr = tfile.Get(name)
131  if objptr and objptr.InheritsFrom("TH1"):
132  nbins = objptr.GetNbinsX()
133  x_min = objptr.GetXaxis().GetXmin()
134  x_max = objptr.GetXaxis().GetXmax()
135  else:
136  basf2.B2WARNING('Requested object with name: ' + name + ' not found in file: ' + refFileName + " (or not a TH1)")
137  else:
138  basf2.B2WARNING('Requested file: ' + refFileName + ' could not be opened')
139 
140  tfile.Close()
141 
142  # set current directory back to original one
143  oldDirectory.cd()
144 
145  return nbins, x_min, x_max
146 
147 
148 
149 StatsEntry = ROOT.TParameter(float)
150 
151 
152 class ValidationPlot(object):
153 
154  """Class for generating a validation plot for the Belle II validation page.
155 
156  Typically it generates plots from values stored in numpy arrays and feeds them into
157  plot ROOT classes for storing them.
158 
159  It implements an automatic binning procedure based on the rice rule and
160  robust z score outlier detection.
161 
162  It also keeps track of additional statistics typically neglected by ROOT such as a count
163  for the non finit values such as NaN, +Inf, -Inf.
164 
165  The special attributes for the Belle II validation page like
166  * title
167  * contract
168  * description
169  * check
170  are exposed as properties of this class.
171  """
172 
173 
174  very_sparse_dots_line_style_index = 28
175  # This line style is defined in the set_tstyle method below.
176 
177  def __init__(self, name, referenceFileName=None):
178  """Constructor of the ValidationPlot
179 
180  Parameters
181  ----------
182  name : str
183  A unique name to be used as the name of the ROOT object to be generated
184 
185  referenceFileName : str
186  name of a reference file. If set the code will try to get the histogram or profile
187  from that file and determine the number of bins and upper and lower bound
188  (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
189  """
190 
191 
192  self.name = root_save_name(name)
193 
194 
195  self.referenceFileName = referenceFileName
196 
197 
198  self._description = ''
199 
200 
201  self._check = ''
202 
203 
204  self._contact = ''
205 
206 
207  self._xlabel = ''
208 
209 
210  self._ylabel = ''
211 
212 
213  self._title = ''
214 
215 
216  self.plot = None
217 
218 
219  self.histograms = []
220 
221 
222  self._is_expert = True
223 
224 
225  self.pvalue_warn = None
226 
227 
228  self.pvalue_error = None
229 
230 
231  self.y_log = False
232 
233  def hist(self,
234  xs,
235  weights=None,
236  stackby=None,
237  bins=None,
238  lower_bound=None,
239  upper_bound=None,
240  outlier_z_score=None,
241  include_exceptionals=True,
242  allow_discrete=False,
243  cumulation_direction=None,
244  is_expert=True):
245  """Fill the plot with a one dimensional histogram."""
246 
247  # if referenceFileName was set the binning will taken from there
248  if self.referenceFileName is not None:
249  n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
250  if n is not None and xmin is not None and xmax is not None:
251  bins = n
252  upper_bound = xmax
253  lower_bound = xmin
254 
255  th1_factory = ROOT.TH1D
256  self._is_expert = is_expert
257 
258  self.create_1d(th1_factory,
259  xs,
260  weights=weights,
261  stackby=stackby,
262  bins=bins,
263  lower_bound=lower_bound,
264  upper_bound=upper_bound,
265  outlier_z_score=outlier_z_score,
266  include_exceptionals=include_exceptionals,
267  allow_discrete=allow_discrete,
268  cumulation_direction=cumulation_direction)
269 
270  if not self.ylabel:
271 
272  self.ylabel = 'count'
273 
274  return self
275 
276  def profile(self,
277  xs,
278  ys,
279  weights=None,
280  stackby=None,
281  bins=None,
282  lower_bound=None,
283  upper_bound=None,
284  y_binary=None,
285  y_log=None,
286  outlier_z_score=None,
287  include_exceptionals=True,
288  allow_discrete=False,
289  cumulation_direction=None,
290  gaus_z_score=None,
291  is_expert=True):
292  """Fill the plot with a one dimensional profile of one variable over another."""
293 
294  # if referenceFileName was set the binning will taken from there
295  if self.referenceFileName is not None:
296  n = None
297  xmin = None
298  xmax = None
299  n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
300  if n is not None and xmin is not None and xmax is not None:
301  bins = n
302  upper_bound = xmax
303  lower_bound = xmin
304 
305  th1_factory = ROOT.TProfile
306  self._is_expert = is_expert
307  if gaus_z_score is None:
308  self.create_1d(th1_factory,
309  xs,
310  ys,
311  weights=weights,
312  stackby=stackby,
313  bins=bins,
314  lower_bound=lower_bound,
315  upper_bound=upper_bound,
316  outlier_z_score=outlier_z_score,
317  include_exceptionals=include_exceptionals,
318  allow_discrete=allow_discrete,
319  cumulation_direction=cumulation_direction)
320  else:
321  # Introduce a dummy name for the temporary two dimensional histogram
322  self.name = "_" + self.name
323  self.hist2d(xs, ys=ys, weights=weights, stackby=stackby,
324  bins=(bins, None),
325  lower_bound=(lower_bound, None),
326  upper_bound=(upper_bound, None),
327  outlier_z_score=(outlier_z_score, outlier_z_score),
328  include_exceptionals=(include_exceptionals, True),
329  allow_discrete=(allow_discrete, False),
330  is_expert=is_expert)
331 
332  self.name = self.name[1:]
333  profiles = []
334  for histogram in self.histograms:
335  profile = self.gaus_slice_fit(histogram,
336  name=histogram.GetName()[1:],
337  z_score=gaus_z_score)
338  profiles.append(profile)
339  self.histograms = profiles
340  self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False)
341 
342  if y_log:
343  self.y_log = True
344 
345  if y_binary or self.is_binary(ys):
346  if not self.ylabel:
347  self.ylabel = 'probability'
348 
349  for histogram in self.histograms:
350  histogram.SetMinimum(0)
351  histogram.SetMaximum(1.05)
352 
353  self.plot.SetMinimum(0)
354  self.plot.SetMaximum(1.05)
355 
356  return self
357 
358  def scatter(self,
359  xs,
360  ys,
361  stackby=None,
362  lower_bound=(None, None),
363  upper_bound=(None, None),
364  outlier_z_score=(None, None),
365  include_exceptionals=(True, True),
366  max_n_data=100000,
367  is_expert=True):
368  """Fill the plot with a (unbinned) two dimensional scatter plot"""
369 
370  self._is_expert = is_expert
371 
372  x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
373  x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
374  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
375  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
376 
377  x_lower_bound, x_upper_bound = self.determine_range(
378  xs,
379  lower_bound=x_lower_bound,
380  upper_bound=x_upper_bound,
381  outlier_z_score=x_outlier_z_score,
382  include_exceptionals=x_include_exceptionals
383  )
384 
385  y_lower_bound, y_upper_bound = self.determine_range(
386  ys,
387  lower_bound=y_lower_bound,
388  upper_bound=y_upper_bound,
389  outlier_z_score=y_outlier_z_score,
390  include_exceptionals=y_include_exceptionals
391  )
392 
393  graph = ROOT.TGraph()
394 
395  graph.SetName(self.name)
396  graph.SetMarkerStyle(6)
397  graph.GetHistogram().SetOption("AP") # <-- this has no effect?
398 
399  # Trying to make the graph lines invisible
400  color_index = 0 # white
401  # Transperent white
402  graph.SetLineColorAlpha(color_index, 0)
403  graph.SetLineStyle(self.very_sparse_dots_line_style_index)
404 
405  # Transport the lower and upper bound as ranges of the axis
406  graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
407  graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
408 
409  self.create(graph,
410  xs=xs,
411  ys=ys,
412  stackby=stackby,
413  reverse_stack=False)
414 
415  return self
416 
417  def grapherrors(self,
418  xs_and_err,
419  ys_and_err,
420  stackby=None,
421  lower_bound=(None, None),
422  upper_bound=(None, None),
423  outlier_z_score=(None, None),
424  include_exceptionals=(True, True),
425  max_n_data=100000,
426  is_expert=True):
427  """Fill the plot with a (unbinned) two dimensional scatter plot
428  xs_and_err and ys_and_err are tuples containing the values and the errors on these values
429  as numpy arrays
430  """
431 
432  self._is_expert = is_expert
433 
434  xs = xs_and_err[0]
435  ys = ys_and_err[0]
436 
437  x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
438  x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
439  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
440  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
441 
442  x_lower_bound, x_upper_bound = self.determine_range(
443  xs,
444  lower_bound=x_lower_bound,
445  upper_bound=x_upper_bound,
446  outlier_z_score=x_outlier_z_score,
447  include_exceptionals=x_include_exceptionals
448  )
449 
450  y_lower_bound, y_upper_bound = self.determine_range(
451  ys,
452  lower_bound=y_lower_bound,
453  upper_bound=y_upper_bound,
454  outlier_z_score=y_outlier_z_score,
455  include_exceptionals=y_include_exceptionals
456  )
457 
458  graph = ROOT.TGraphErrors()
459 
460  graph.SetName(self.name)
461  graph.GetHistogram().SetOption("A")
462 
463  graph.SetMarkerColor(4)
464  graph.SetMarkerStyle(21)
465 
466  # Transport the lower and upper bound as ranges of the axis
467  graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
468  graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
469 
470  self.create(graph,
471  xs=xs,
472  ys=ys,
473  stackby=stackby,
474  reverse_stack=False)
475 
476  return self
477 
478  def hist2d(self,
479  xs,
480  ys,
481  weights=None,
482  stackby=None,
483  bins=(None, None),
484  lower_bound=(None, None),
485  upper_bound=(None, None),
486  outlier_z_score=(None, None),
487  include_exceptionals=(True, True),
488  allow_discrete=(False, False),
489  quantiles=None,
490  is_expert=True):
491  """Fill the plot with a two dimensional histogram"""
492 
493  name = self.name
494  # Introduce a dummy name for the temporary two dimensional histogram
495  if quantiles is not None:
496  name = "_" + self.name
497 
498  is_expert = self.is_expert
499 
500  x_bins, y_bins = self.unpack_2d_param(bins)
501  x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
502  x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
503  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
504  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
505  x_allow_discrete, y_allow_discrete = self.unpack_2d_param(allow_discrete)
506 
507  if quantiles is not None:
508  y_include_exceptionals = True
509  y_allow_discrete = False
510 
511  x_bin_edges, x_bin_labels = self.determine_bin_edges(xs,
512  stackbys=stackby,
513  bins=x_bins,
514  lower_bound=x_lower_bound,
515  upper_bound=x_upper_bound,
516  outlier_z_score=x_outlier_z_score,
517  include_exceptionals=x_include_exceptionals,
518  allow_discrete=x_allow_discrete)
519 
520  y_bin_edges, y_bin_labels = self.determine_bin_edges(ys,
521  stackbys=stackby,
522  bins=y_bins,
523  lower_bound=y_lower_bound,
524  upper_bound=y_upper_bound,
525  outlier_z_score=y_outlier_z_score,
526  include_exceptionals=y_include_exceptionals,
527  allow_discrete=y_allow_discrete)
528 
529  n_x_bins = len(x_bin_edges) - 1
530  n_y_bins = len(y_bin_edges) - 1
531 
532  self.lower_bound = (x_bin_edges[0], y_bin_edges[0])
533 
534  self.upper_bound = (x_bin_edges[-1], y_bin_edges[-1])
535 
536  histogram = ROOT.TH2D(name,
537  '',
538  n_x_bins,
539  x_bin_edges,
540  n_y_bins,
541  y_bin_edges)
542 
543  if x_bin_labels:
544  get_logger().info("Scatter plot %s is discrete in x.", name)
545  x_taxis = histogram.GetXaxis()
546  for i_x_bin, x_bin_label in enumerate(x_bin_labels):
547  x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
548  self.add_stats_entry(histogram, "dx", 1)
549 
550  else:
551  x_bin_width = x_bin_edges[1] - x_bin_edges[0]
552  self.add_stats_entry(histogram, "dx", x_bin_width)
553 
554  if y_bin_labels:
555  get_logger().info("Scatter plot %s is discrete in y.", name)
556  y_taxis = histogram.GetYaxis()
557  for i_y_bin, y_bin_label in enumerate(y_bin_labels):
558  y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
559  self.add_stats_entry(histogram, "dy", 1)
560 
561  else:
562  y_bin_width = y_bin_edges[1] - y_bin_edges[0]
563  self.add_stats_entry(histogram, "dy", y_bin_width)
564 
565  self.create(histogram, xs, ys=ys, weights=weights, stackby=stackby)
566 
567  if quantiles is not None:
568  self.name = self.name[1:]
569  profiles = []
570  for histogram in self.histograms:
571  for quantile in quantiles:
572  profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] + '_' + str(quantile))
573 
574  # Manually copy labels grumble grumble
575  x_taxis = histogram.GetXaxis()
576  new_x_taxis = profile.GetXaxis()
577  for i_bin in range(x_taxis.GetNbins() + 2):
578  label = x_taxis.GetBinLabel(i_bin)
579  if label != "":
580  new_x_taxis.SetBinLabel(i_bin, label)
581 
582  # Remove faulty error values)
583  epsilon = sys.float_info.epsilon
584  for i_bin in range(0, profile.GetNbinsX() + 2):
585  profile.SetBinError(i_bin, epsilon)
586 
587  profiles.append(profile)
588 
589  self.histograms = profiles
590  self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False, force_graph=True)
591 
592  # Adjust the discrete bins after the filling to be equidistant
593  if x_bin_labels:
594  for histogram in self.histograms:
595  x_taxis = histogram.GetXaxis()
596  x_bin_edges = array.array("d", list(range(len(x_bin_labels) + 1)))
597  x_taxis.Set(n_x_bins, x_bin_edges)
598 
599  # Adjust the discrete bins after the filling to be equidistant
600  if y_bin_labels:
601  for histogram in self.histograms:
602  x_taxis = histogram.GetXaxis()
603  y_bin_edges = array.array("d", list(range(len(y_bin_labels) + 1)))
604  y_taxis.Set(n_y_bins, y_bin_edges)
605 
606  return self
607 
608  def fit_gaus(self, z_score=None):
609  """Fit a gaus belle curve to the central portion of a one dimensional histogram
610 
611  The fit is applied to the central mean +- z_score * std interval of the histogram,
612  such that it is less influence by non gaussian tails further away than the given z score.
613 
614  @param float z_score number of sigmas to include from the mean value of the histogram.
615  """
616  title = "gaus"
617  formula = "gaus"
618 
619  plot = self.plot
620  if plot is None:
621  raise RuntimeError('Validation plot must be filled before it can be fitted.')
622 
623  if not isinstance(plot, ROOT.TH1D):
624  raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
625 
626  histogram = plot
627 
628  fit_tf1 = ROOT.TF1("Fit", formula)
629  fit_tf1.SetTitle(title)
630  fit_tf1.SetParName(0, "n")
631  fit_tf1.SetParName(1, "mean")
632  fit_tf1.SetParName(2, "std")
633 
634  n = histogram.GetSumOfWeights()
635  mean = histogram.GetMean()
636  std = histogram.GetStdDev()
637 
638  fit_tf1.SetParameter(0, n)
639  fit_tf1.SetParameter(1, mean)
640  fit_tf1.SetParameter(2, std)
641 
642  fit_options = "LM"
643  return self.fit(fit_tf1,
644  fit_options,
645  z_score=z_score)
646 
647  def fit_line(self):
648  """Fit a general line to a one dimensional histogram"""
649  title = "line"
650  formula = "x++1"
651  fit_tf1 = ROOT.TF1("Fit", formula)
652  fit_tf1.SetTitle(title)
653  fit_tf1.SetParName(0, "slope")
654  fit_tf1.SetParName(1, "intercept")
655  self.fit(fit_tf1, 'M')
656 
657  def fit_const(self):
658  """Fit a constant function to a one dimensional histogram"""
659  title = "const"
660  formula = "[0]"
661  fit_tf1 = ROOT.TF1("Fit", formula)
662  fit_tf1.SetTitle(title)
663  fit_tf1.SetParName(0, "intercept")
664  self.fit(fit_tf1, 'M')
665 
666  def fit_diag(self):
667  """Fit a diagonal line through the origin to a one dimensional histogram"""
668  title = "diag"
669  formula = "[0]*x"
670  fit_tf1 = ROOT.TF1("Fit", formula)
671  fit_tf1.SetTitle(title)
672  fit_tf1.SetParName(0, "slope")
673  self.fit(fit_tf1, 'M')
674 
675  def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
676  """Fit a user defined function to a one dimensional histogram
677 
678  Parameters
679  ----------
680  formula : str or TF1
681  Formula string or TH1 to be fitted. See TF1 constructurs for that is a valid formula
682  options : str
683  Options string to be used in the fit. See TH1::Fit()
684  lower_bound : float
685  Lower bound of the range to be fitted
686  upper_bound : float
687  Upper bound of the range to be fitted
688  """
689  plot = self.plot
690  if plot is None:
691  raise RuntimeError('Validation plot must be filled before it can be fitted.')
692 
693  if not isinstance(plot, ROOT.TH1D):
694  raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
695 
696  histogram = plot
697 
698  xaxis = histogram.GetXaxis()
699  n_bins = xaxis.GetNbins()
700  hist_lower_bound = xaxis.GetBinLowEdge(1)
701  hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
702 
703  if z_score is not None:
704  mean = histogram.GetMean()
705  std = histogram.GetStdDev()
706 
707  if lower_bound is None:
708  lower_bound = mean - z_score * std
709 
710  if upper_bound is None:
711  upper_bound = mean + z_score * std
712 
713  # Setup the plotting range of the function to match the histogram
714  if isinstance(formula, ROOT.TF1):
715  fit_tf1 = formula
716  fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
717  else:
718  fit_tf1 = ROOT.TF1("Fit",
719  formula,
720  hist_lower_bound,
721  hist_upper_bound)
722  get_logger().info('Fitting with %s', fit_tf1.GetExpFormula())
723 
724  # Determine fit range
725  if lower_bound is None or lower_bound < hist_lower_bound:
726  lower_bound = hist_lower_bound
727  if upper_bound is None or upper_bound > hist_upper_bound:
728  upper_bound = hist_upper_bound
729 
730  # Make sure the fitted function is not automatically added since we want to do that one our own.
731  # Look for the documentation of TH1::Fit() for details of the options.
732  if 'N' not in options:
733  options += 'N'
734 
735  fit_res = histogram.Fit(fit_tf1, options + "S", "", lower_bound, upper_bound)
736 
737  self.set_fit_tf1(histogram, fit_tf1)
738  return fit_res
739 
740  def show(self):
741  """Show the plot."""
742  if self.plot:
743  self.plot.Draw()
744  else:
745  raise ValueError("Can not show a validation plot that has not been filled.")
746 
747  def write(self, tdirectory=None):
748  """Write the plot to file
749 
750  Parameters
751  ----------
752  tdirectory : ROOT.TDirectory, optional
753  ROOT directory to which the plot should be written.
754  If omitted write to the current directory
755  """
756  if not self.plot:
757  raise ValueError("Can not write a validation plot that has not been filled.")
758 
759  with root_cd(tdirectory):
760  ValidationPlot.set_tstyle()
761  if self.plot not in self.histograms:
762  self.plot.Write()
763 
764  # always disable ROOT's stat plot because it hides items
765  meta_options = ["nostats"]
766 
767  # add expert option, if requested
768  if self.is_expert:
769  meta_options.append("expert")
770  else:
771  meta_options.append("shifter")
772 
773  # give a custom pvalue warning / error zone if requested
774  if self.pvalue_error is not None:
775  meta_options.append("pvalue-error={}".format(self.pvalue_error))
776  if self.pvalue_warn is not None:
777  meta_options.append("pvalue-warn={}".format(self.pvalue_warn))
778 
779  # Indicator if the y axes should be displayed as a logarithmic scale
780  if self.y_log:
781  meta_options.append("logy")
782 
783  meta_options_str = ",".join(meta_options)
784 
785  for histogram in self.histograms:
786  histogram.GetListOfFunctions().Add(ROOT.TNamed('MetaOptions', meta_options_str))
787  histogram.Write()
788 
789  @property
790  def is_expert(self):
791  """Getter method if an plot plot is marked as expert plot"""
792  return self._is_expert
793 
794  @property
795  def title(self):
796  """Getter for the plot title"""
797  return self._title
798 
799  @title.setter
800  def title(self, title):
801  """Setter for the plot title"""
802  self._title = title
803  if self.plot:
804  self.plot.SetTitle(title)
805  for histogram in self.histograms:
806  histogram.SetTitle(title)
807 
808  @property
809  def xlabel(self):
810  """Getter for the axis label at the x axis"""
811  return self._xlabel
812 
813  @xlabel.setter
814  def xlabel(self, xlabel):
815  """Setter for the axis label at the x axis"""
816  self._xlabel = xlabel
817  for histogram in self.histograms:
818  histogram.GetXaxis().SetTitle(xlabel)
819 
820  @property
821  def ylabel(self):
822  """Getter for the axis label at the y axis"""
823  return self._ylabel
824 
825  @ylabel.setter
826  def ylabel(self, ylabel):
827  """Setter for the axis label at the y axis"""
828  self._ylabel = ylabel
829  for histogram in self.histograms:
830  histogram.GetYaxis().SetTitle(ylabel)
831 
832  @property
833  def contact(self):
834  """Getter for the contact email address to be displayed on the validation page"""
835  return self._contact
836 
837  @contact.setter
838  def contact(self, contact):
839  """Setter for the contact email address to be displayed on the validation page"""
840  self._contact = contact
841  for histogram in self.histograms:
842  found_obj = histogram.FindObject('Contact')
843  if not found_obj:
844  tnamed = ROOT.TNamed("Contact", contact)
845  histogram.GetListOfFunctions().Add(tnamed)
846  found_obj = histogram.FindObject('Contact')
847  found_obj.SetTitle(contact)
848 
849  @property
850  def description(self):
851  """Getter for the description to be displayed on the validation page"""
852  return self._description
853 
854  @description.setter
855  def description(self, description):
856  """Setter for the description to be displayed on the validation page"""
857  self._description = description
858  for histogram in self.histograms:
859  found_obj = histogram.FindObject('Description')
860  if not found_obj:
861  tnamed = ROOT.TNamed("Description", description)
862  histogram.GetListOfFunctions().Add(tnamed)
863  found_obj = histogram.FindObject('Description')
864  found_obj.SetTitle(description)
865 
866  @property
867  def check(self):
868  """Getter for the check to be displayed on the validation page"""
869  return self._check
870 
871  @check.setter
872  def check(self, check):
873  """Setter for the check to be displayed on the validation page"""
874  self._check = check
875  for histogram in self.histograms:
876  found_obj = histogram.FindObject('Check')
877  if not found_obj:
878  tnamed = ROOT.TNamed("Check", check)
879  histogram.GetListOfFunctions().Add(tnamed)
880  found_obj = histogram.FindObject('Check')
881  found_obj.SetTitle(check)
882 
883  # Implementation details #
884  # ###################### #
885 
886  @staticmethod
887  def unpack_2d_param(param):
888  """Unpacks a function parameter for the two dimensional plots.
889 
890  If it is a pair the first parameter shall apply to the x coordinate
891  the second to the y coordinate. In this case the pair is returned as two values
892 
893  If something else is given the it is assumed that this parameter should equally apply
894  to both coordinates. In this case the same values is return twice as a pair.
895 
896  Parameters
897  ----------
898  param : pair or single value
899  Function parameter for a two dimensional plot
900 
901  Returns
902  -------
903  pair
904  A pair of values being the parameter for the x coordinate and
905  the y coordinate respectively
906  """
907  try:
908  if len(param) == 2:
909  x_param, y_param = param
910  except TypeError:
911  x_param = param
912  y_param = param
913  return x_param, y_param
914 
915  @staticmethod
916  def is_binary(xs):
917  """Determine if the data consists of boolean values"""
918  return statistics.is_binary_series(xs)
919 
920  @staticmethod
921  def is_discrete(xs, max_n_unique=None):
922  """Determine if the data consists of discrete values"""
923  return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
924 
925  @staticmethod
927  """Find exceptionally frequent values
928 
929  Parameters
930  ----------
931  xs : np.array (1d)
932  Data series
933 
934  Returns
935  -------
936  np.array (1d)
937  A list of the found exceptional values.
938  """
939  return statistics.rice_exceptional_values(xs)
940 
941  @staticmethod
943  """Does an estimation of mean and standard deviation robust against outliers.
944 
945  Parameters
946  ----------
947  xs : np.array (1d)
948  Data series
949 
950  Returns
951  -------
952  float, float
953  Pair of mean and standard deviation
954  """
955  x_mean = statistics.truncated_mean(xs)
956  x_std = statistics.trimmed_std(xs)
957  return x_mean, x_std
958 
959  @staticmethod
960  def format_bin_label(value):
961  """Formats a value to be placed at a tick on an axis."""
962  if np.isfinite(value) and value == np.round(value):
963  return str(int(value))
964  else:
965  formated_value = "{:.5g}".format(value)
966 
967  # if the label is to long, switch to shorter "e" format
968  if len(formated_value) > 8:
969  formated_value = "{:.3e}".format(value)
970  return formated_value
971 
972  def create_1d(self,
973  th1_factory,
974  xs,
975  ys=None,
976  weights=None,
977  bins=None,
978  stackby=None,
979  lower_bound=None,
980  upper_bound=None,
981  outlier_z_score=None,
982  include_exceptionals=True,
983  allow_discrete=False,
984  cumulation_direction=None):
985  """Combined factory method for creating a one dimensional histogram or a profile plot."""
986  name = self.name
987 
988  # Coerce values to a numpy array. Do not copy if already a numpy array.
989  xs = np.array(xs, copy=False)
990 
991  if ys is not None:
992  ys = np.array(ys, copy=False)
993 
994  if weights is not None:
995  weights = np.array(weights, copy=False)
996 
997  bin_edges, bin_labels = self.determine_bin_edges(xs,
998  stackbys=stackby,
999  bins=bins,
1000  lower_bound=lower_bound,
1001  upper_bound=upper_bound,
1002  outlier_z_score=outlier_z_score,
1003  include_exceptionals=include_exceptionals,
1004  allow_discrete=allow_discrete)
1005 
1006  n_bins = len(bin_edges) - 1
1007  self.lower_bound = bin_edges[0]
1008  self.upper_bound = bin_edges[-1]
1009  histogram = th1_factory(name, '', n_bins, bin_edges)
1010 
1011  if bin_labels:
1012  get_logger().info("One dimensional plot %s is discrete in x.", name)
1013  x_taxis = histogram.GetXaxis()
1014  for i_bin, bin_label in enumerate(bin_labels):
1015  x_taxis.SetBinLabel(i_bin + 1, bin_label)
1016  self.add_stats_entry(histogram, "dx", 1)
1017 
1018  else:
1019  bin_width = bin_edges[1] - bin_edges[0]
1020  self.add_stats_entry(histogram, "dx", bin_width)
1021 
1022  self.create(histogram,
1023  xs,
1024  ys=ys,
1025  weights=weights,
1026  stackby=stackby,
1027  cumulation_direction=cumulation_direction,
1028  reverse_stack=True)
1029  # Reverse the stack to have the signal distribution at the bottom
1030 
1031  # Adjust the discrete bins after the filling to be equidistant
1032  if bin_labels:
1033  for histogram in self.histograms:
1034  x_taxis = histogram.GetXaxis()
1035  bin_edges = array.array("d", list(range(len(bin_labels) + 1)))
1036  x_taxis.Set(n_bins, bin_edges)
1037 
1038  def create(self,
1039  histogram_template,
1040  xs,
1041  ys=None,
1042  weights=None,
1043  stackby=None,
1044  cumulation_direction=None,
1045  reverse_stack=None):
1046  """Create histograms from a template, possibly stacked"""
1047 
1048  histograms = []
1049 
1050  if stackby is None:
1051  histogram = histogram_template
1052  self.fill_into(histogram, xs, ys, weights=weights)
1053  if cumulation_direction is not None:
1054  histogram = self.cumulate(histogram, cumulation_direction=cumulation_direction)
1055 
1056  histograms.append(histogram)
1057  plot = histogram
1058 
1059  else:
1060  stackby = np.array(stackby, copy=False)
1061  name = histogram_template.GetName()
1062 
1063  histograms = self.fill_into_grouped(histogram_template,
1064  xs,
1065  ys=ys,
1066  weights=weights,
1067  groupbys=stackby,
1068  groupby_label="stack")
1069 
1070  if cumulation_direction is not None:
1071  histograms = [self.cumulate(histogram, cumulation_direction=cumulation_direction)
1072  for histogram in histograms]
1073 
1074  plot = self.create_stack(histograms, name=name + "_stacked", reverse_stack=reverse_stack)
1075 
1076  self.histograms = histograms
1077  self.plot = plot
1078  self.attach_attributes()
1079 
1080  @classmethod
1081  def create_stack(cls, histograms, name, reverse_stack, force_graph=False):
1082  """Create a stack of histograms"""
1083  if len(histograms) == 1:
1084  plot = histograms[0]
1085  else:
1086  if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph)) or force_graph:
1087  plot = ROOT.TMultiGraph()
1088  else:
1089  plot = ROOT.THStack()
1090 
1091  plot.SetName(name)
1092 
1093  # Add the histogram in reverse order such
1094  # that the signal usually is on the bottom an well visible
1095  if reverse_stack:
1096  for histogram in reversed(histograms):
1097  if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1098  histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1099  plot.Add(histogram, "APZ")
1100  else:
1101  plot.Add(histogram)
1102  else:
1103  for histogram in histograms:
1104  if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1105  histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1106  plot.Add(histogram, "APZ")
1107  else:
1108  plot.Add(histogram)
1109 
1110  return plot
1111 
1112  @classmethod
1113  def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False):
1114  """Extract errors from a TProfile histogram and create a TGraph from these"""
1115  if isinstance(tprofile, ROOT.TGraph):
1116  return tprofile
1117 
1118  x_taxis = tprofile.GetXaxis()
1119  n_bins = x_taxis.GetNbins()
1120 
1121  bin_ids_with_underflow = list(range(n_bins + 1))
1122  bin_ids_without_underflow = list(range(1, n_bins + 1))
1123 
1124  bin_centers = np.array([x_taxis.GetBinCenter(i_bin) for i_bin in bin_ids_without_underflow])
1125  if abs_x:
1126  bin_centers = np.abs(bin_centers)
1127  bin_widths = np.array([x_taxis.GetBinWidth(i_bin) for i_bin in bin_ids_without_underflow])
1128  bin_x_errors = bin_widths / 2.0
1129 
1130  # Now for the histogram content not including the underflow
1131  bin_contents = np.array([tprofile.GetBinContent(i_bin) for i_bin in bin_ids_without_underflow])
1132  bin_y_errors = np.array([tprofile.GetBinError(i_bin) for i_bin in bin_ids_without_underflow])
1133 
1134  tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1135 
1136  tgrapherrors.GetHistogram().SetOption("APZ") # <-- This has no effect?
1137 
1138  tgrapherrors.SetLineColor(tprofile.GetLineColor())
1139  tgrapherrors.SetLineColor(tprofile.GetLineColor())
1140 
1141  # Copy all functions and stats entries
1142  for tobject in tprofile.GetListOfFunctions():
1143  tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1144 
1145  # Add stats entries that are displayed for profile plots
1146  cls.add_stats_entry(tgrapherrors, 'count', tprofile.GetEntries())
1147 
1148  stats_values = np.array([np.nan] * 6)
1149  tprofile.GetStats(stats_values)
1150 
1151  sum_w = stats_values[0]
1152  sum_w2 = stats_values[1]
1153  sum_wx = stats_values[2]
1154  sum_wx2 = stats_values[3]
1155  sum_wy = stats_values[4]
1156  sum_wy2 = stats_values[5]
1157 
1158  cls.add_stats_entry(tgrapherrors,
1159  "x avg",
1160  sum_wx / sum_w)
1161 
1162  cls.add_stats_entry(tgrapherrors,
1163  "x std",
1164  np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1165 
1166  cls.add_stats_entry(tgrapherrors,
1167  "y avg",
1168  sum_wy / sum_w)
1169 
1170  cls.add_stats_entry(tgrapherrors,
1171  "y std",
1172  np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1173 
1174  cls.add_stats_entry(tgrapherrors,
1175  'cov',
1176  tgrapherrors.GetCovariance())
1177 
1178  cls.add_stats_entry(tgrapherrors,
1179  'corr',
1180  tgrapherrors.GetCorrelationFactor())
1181 
1182  return tgrapherrors
1183 
1185  histogram_template,
1186  xs,
1187  ys=None,
1188  weights=None,
1189  groupbys=None,
1190  groupby_label="group"):
1191  """Fill data into similar histograms in groups indicated by a groupby array"""
1192 
1193  histograms = []
1194  unique_groupbys = np.unique(groupbys)
1195  name = histogram_template.GetName()
1196 
1197  for i_value, value in enumerate(unique_groupbys):
1198  if np.isnan(value):
1199  indices_for_value = np.isnan(groupbys)
1200  else:
1201  indices_for_value = groupbys == value
1202 
1203  # Make a copy of the empty histogram
1204  histogram_for_value = histogram_template.Clone(name + '_' + str(value))
1205  i_root_color = i_value + 1
1206 
1207  self.set_color(histogram_for_value, i_root_color)
1208 
1209  if groupby_label:
1210  self.add_stats_entry(histogram_for_value, groupby_label, value)
1211 
1212  self.fill_into(histogram_for_value,
1213  xs,
1214  ys=ys,
1215  weights=weights,
1216  filter=indices_for_value)
1217 
1218  histograms.append(histogram_for_value)
1219 
1220  return histograms
1221 
1222  def set_color(self, tobject, root_i_color):
1223  """Set the color of the ROOT object.
1224 
1225  By default the line color of a TGraph should be invisible, so do not change it
1226  For other objects set the marker and the line color
1227 
1228  Parameters
1229  ----------
1230  tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1231  Object of which the color should be set.
1232  root_i_color : int
1233  Color index of the ROOT color table
1234  """
1235  if isinstance(tobject, ROOT.TGraph):
1236  tobject.SetMarkerColor(root_i_color)
1237  else:
1238  tobject.SetLineColor(root_i_color)
1239  tobject.SetMarkerColor(root_i_color)
1240 
1241  def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1242  """Fill the data into the plot object"""
1243  if isinstance(plot, ROOT.TGraph):
1244  if ys is None:
1245  raise ValueError("ys are required for filling a graph")
1246  self.fill_into_tgraph(plot, xs, ys, filter=filter)
1247  elif isinstance(plot, ROOT.TGraphErrors):
1248  if ys is None:
1249  raise ValueError("ys are required for filling a graph error")
1250 
1251  self.fill_into_tgrapherror(plot, xs, ys)
1252  else:
1253  self.fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1254 
1255  def fill_into_tgrapherror(self, graph, xs, ys, filter=None):
1256  """fill point values and error of the x and y axis into the graph"""
1257 
1258  assert(len(xs[0]) == len(ys[0]))
1259  # set the overall amount of points
1260  graph.Set(len(xs[0]))
1261 
1262  for i in range(len(xs[0])):
1263  graph.SetPoint(i, xs[0][i], ys[0][i])
1264  graph.SetPointError(i, xs[1][i], ys[1][i])
1265 
1266  def fill_into_tgraph(self, graph, xs, ys, filter=None):
1267  """Fill the data into a TGraph"""
1268 
1269  # Save some ifs by introducing a dummy slicing as a non filter
1270  if filter is None:
1271  filter = slice(None)
1272 
1273  xs = xs[filter]
1274  ys = ys[filter]
1275 
1276  max_n_data = 100000
1277  x_n_data = len(xs)
1278  y_n_data = len(ys)
1279 
1280  if max_n_data:
1281  if x_n_data > max_n_data or y_n_data > max_n_data:
1282  get_logger().warning("Number of points in scatter graph %s exceed limit %s" %
1283  (self.name, max_n_data))
1284 
1285  get_logger().warning("Cropping %s" % max_n_data)
1286 
1287  xs = xs[0:max_n_data]
1288  ys = ys[0:max_n_data]
1289 
1290  x_axis = graph.GetXaxis()
1291  y_axis = graph.GetYaxis()
1292 
1293  x_lower_bound = x_axis.GetXmin()
1294  x_upper_bound = x_axis.GetXmax()
1295 
1296  y_lower_bound = y_axis.GetXmin()
1297  y_upper_bound = y_axis.GetXmax()
1298 
1299  x_underflow_indices = xs < x_lower_bound
1300  x_overflow_indices = xs > x_upper_bound
1301 
1302  y_underflow_indices = ys < y_lower_bound
1303  y_overflow_indices = ys > y_upper_bound
1304 
1305  plot_indices = ~(np.isnan(xs) |
1306  x_underflow_indices |
1307  x_overflow_indices |
1308  np.isnan(ys) |
1309  y_underflow_indices |
1310  y_overflow_indices)
1311 
1312  n_plot_data = np.sum(plot_indices)
1313  plot_xs = xs[plot_indices]
1314  plot_ys = ys[plot_indices]
1315 
1316  graph.Set(int(n_plot_data))
1317  for i, (x, y) in enumerate(zip(plot_xs, plot_ys)):
1318  graph.SetPoint(i, x, y)
1319 
1320  self.add_stats_entry(graph, 'count', np.sum(np.isfinite(xs)))
1321 
1322  self.add_nan_inf_stats(graph, 'x', xs)
1323  self.add_nan_inf_stats(graph, 'y', ys)
1324 
1325  x_n_underflow = np.sum(x_underflow_indices)
1326  if x_n_underflow:
1327  self.add_stats_entry(graph, 'x underf.', x_n_underflow)
1328 
1329  x_n_overflow = np.sum(x_overflow_indices)
1330  if x_n_overflow:
1331  self.add_stats_entry(graph, 'x overf.', x_n_overflow)
1332 
1333  y_n_underflow = np.sum(y_underflow_indices)
1334  if y_n_underflow:
1335  self.add_stats_entry(graph, 'y underf.', y_n_underflow)
1336 
1337  y_n_overflow = np.sum(y_overflow_indices)
1338  if y_n_overflow:
1339  self.add_stats_entry(graph, 'y overf.', y_n_overflow)
1340 
1341  self.add_stats_entry(graph, 'x avg', graph.GetMean(1))
1342  self.add_stats_entry(graph, 'x std', graph.GetRMS(1))
1343 
1344  self.add_stats_entry(graph, 'y avg', graph.GetMean(2))
1345  self.add_stats_entry(graph, 'y std', graph.GetRMS(2))
1346 
1347  self.add_stats_entry(graph, 'cov', graph.GetCovariance())
1348  self.add_stats_entry(graph, 'corr', graph.GetCorrelationFactor())
1349 
1350  def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None):
1351  """Fill the histogram blocking non finite values
1352 
1353  Parameters
1354  ----------
1355  histogram : ROOT.TH1
1356  The histogram to be filled
1357  xs : numpy.ndarray (1d)
1358  Data for the first axes
1359  ys : numpy.ndarray (1d), optional
1360  Data for the second axes
1361  weights : numpy.ndarray (1d), optional
1362  Weight of the individual points. Defaults to one for each
1363  filter : numpy.ndarray, optional
1364  Boolean index array indicating which entries shall be taken.
1365  """
1366  # Save some ifs by introducing a dummy slicing as a non filter
1367  if filter is None:
1368  filter = slice(None)
1369 
1370  xs = xs[filter]
1371  # Count the nan and inf values in x
1372  self.add_nan_inf_stats(histogram, 'x', xs)
1373  finite_filter = np.isfinite(xs)
1374 
1375  if ys is not None:
1376  ys = ys[filter]
1377  # Count the nan and inf values in y
1378  self.add_nan_inf_stats(histogram, 'y', ys)
1379  finite_filter &= np.isfinite(ys)
1380 
1381  if weights is None:
1382  xs = xs[finite_filter]
1383  weights = np.ones_like(xs)
1384  else:
1385  weights = weights[filter]
1386  self.add_nan_inf_stats(histogram, 'w', weights)
1387  finite_filter &= np.isfinite(weights)
1388  xs = xs[finite_filter]
1389  weights[finite_filter]
1390 
1391  if ys is not None:
1392  ys = ys[finite_filter]
1393 
1394  # Now fill the actual histogram
1395  try:
1396  histogram.FillN
1397  except AttributeError:
1398  Fill = histogram.Fill
1399  if ys is None:
1400  fill = np.frompyfunc(Fill, 2, 1)
1401  fill(xs.astype(np.float64, copy=False),
1402  weights.astype(np.float64, copy=False))
1403  else:
1404  fill = np.frompyfunc(Fill, 3, 1)
1405  fill(xs.astype(np.float64, copy=False),
1406  ys.astype(np.float64, copy=False),
1407  weights.astype(np.float64, copy=False))
1408  else:
1409  if ys is None:
1410  # Make the array types compatible with the ROOT interface if necessary
1411  xs = xs.astype(np.float64, copy=False)
1412  weights = weights.astype(np.float64, copy=False)
1413  n = len(xs)
1414  if n != 0:
1415  histogram.FillN(n, xs, weights)
1416  else:
1417  basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1418 
1419  else:
1420  # Make the array types compatible with the ROOT interface if necessary
1421  xs = xs.astype(np.float64, copy=False)
1422  ys = ys.astype(np.float64, copy=False)
1423  weights = weights.astype(np.float64, copy=False)
1424  n = len(xs)
1425  if n != 0:
1426  histogram.FillN(n, xs, ys, weights)
1427  else:
1428  basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1429 
1430  self.set_additional_stats_tf1(histogram)
1431 
1432  @classmethod
1433  def add_nan_inf_stats(cls, histogram, name, xs):
1434  """ Extracts the counts of non finite floats from a series
1435  and adds them as additional statistics to the histogram.
1436 
1437  Parameters
1438  ----------
1439  histogram : derived from ROOT.TH1 or ROOT.TGraph
1440  Something having a GetListOfFunctions method that
1441  name : str
1442  A label for the data series to be prefixed to the entries.
1443  xs : numpy.ndarray (1d)
1444  Data from which the non finit floats should be counted.
1445  """
1446  n_nans = np.isnan(xs).sum()
1447  if n_nans > 0:
1448  cls.add_stats_entry(histogram, name + ' nan', n_nans)
1449 
1450  n_positive_inf = np.sum(xs == np.inf)
1451  if n_positive_inf > 0:
1452  cls.add_stats_entry(histogram, name + ' pos inf', n_positive_inf)
1453 
1454  n_negative_inf = np.sum(xs == -np.inf)
1455  if n_negative_inf > 0:
1456  cls.add_stats_entry(histogram, name + ' neg inf', n_negative_inf)
1457 
1458  @classmethod
1459  def add_stats_entry(cls, histogram, label, value):
1460  """Add a new additional statistics to the histogram.
1461 
1462  Parameters
1463  ----------
1464  histogram : derived from ROOT.TH1 or ROOT.TGraph
1465  Something having a GetListOfFunctions method that holds the additional statistics
1466  label : str
1467  Label of the statistic
1468  value : float
1469  Value of the statistic
1470  """
1471  stats_entry = StatsEntry(str(label), float(value))
1472  histogram.GetListOfFunctions().Add(stats_entry)
1473  cls.set_additional_stats_tf1(histogram)
1474 
1475  @classmethod
1476  def get_additional_stats(cls, histogram):
1477  """Get the additional statistics from the histogram and return them a dict.
1478 
1479  Parameters
1480  ----------
1481  histogram : derived from ROOT.TH1 or ROOT.TGraph
1482  Something having a GetListOfFunctions method that holds the additional statistics
1483 
1484  Returns
1485  -------
1486  collection.OrderedDict
1487  A map of labels to values for the additional statistics
1488  """
1489  additional_stats = collections.OrderedDict()
1490  for tobject in histogram.GetListOfFunctions():
1491  if isinstance(tobject, StatsEntry):
1492  stats_entry = tobject
1493  label = stats_entry.GetName()
1494  value = stats_entry.GetVal()
1495  additional_stats[label] = value
1496  return additional_stats
1497 
1498  @classmethod
1499  def gaus_slice_fit(cls, th2, name, z_score=None):
1500  """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1501  # profile = th2.ProfileX(name)
1502 
1503  y_taxis = th2.GetYaxis()
1504  th2_lower_bound = y_taxis.GetXmin()
1505  th2_upper_bound = y_taxis.GetXmax()
1506  th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1507  n_y_bins = y_taxis.GetNbins()
1508  if z_score:
1509  y_mean = th2.GetMean(2)
1510  y_std = th2.GetStdDev(2)
1511  fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1512  fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1513  fit_height = fit_upper_bound - fit_lower_bound
1514  # Require all covered bins to be filled
1515  required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1516  else:
1517  fit_lower_bound = th2_lower_bound
1518  fit_upper_bound = th2_upper_bound
1519  fit_height = fit_upper_bound - fit_lower_bound
1520  required_n_bins_inslice_filled = n_y_bins / 1.61
1521 
1522  # Highest required number of bins should be a third
1523  required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1524 
1525  fit_tf1 = ROOT.TF1("Fit", "gaus", fit_lower_bound, fit_upper_bound)
1526  fit_tf1.SetParName(0, "n")
1527  fit_tf1.SetParName(1, "mean")
1528  fit_tf1.SetParName(2, "std")
1529  i_first_bin = 0
1530  i_last_bin = -1
1531  fit_options = "QNR"
1532  param_fit_th1s = ROOT.TObjArray()
1533  th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1534  int(required_n_bins_inslice_filled),
1535  fit_options, param_fit_th1s)
1536 
1537  th1_means = param_fit_th1s.At(1)
1538  th1_means.SetName(name)
1539  th1_means.SetTitle(th2.GetTitle())
1540  for label, value in cls.get_additional_stats(th2).items():
1541  cls.add_stats_entry(th1_means, label, value)
1542 
1543  # Manually copy labels grumble grumble
1544  x_taxis = th2.GetXaxis()
1545  new_x_taxis = th1_means.GetXaxis()
1546  for i_bin in range(x_taxis.GetNbins() + 2):
1547  label = x_taxis.GetBinLabel(i_bin)
1548  if label != "":
1549  new_x_taxis.SetBinLabel(i_bin, label)
1550 
1551  # Adjust plot bound to reflect the fit range.
1552  data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1553  data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1554  data_height = data_upper_bound - data_lower_bound
1555 
1556  plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1557  plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1558 
1559  th1_means.SetMinimum(plot_lower_bound)
1560  th1_means.SetMaximum(plot_upper_bound)
1561 
1562  return th1_means
1563 
1564  @classmethod
1565  def cumulate(cls, histogram, cumulation_direction=None):
1566  """Cumulates the histogram inplace.
1567 
1568  Parameters
1569  ----------
1570  histogram : ROOT.TH1 or ROOT.TProfile
1571  Filled histogram to be cumulated
1572  cumulation_direction : int, optional
1573  Direction is indicated by the sign.
1574  Positive means from left to right, negative means from right to left.
1575  If now cumulation direction is given return the histogram as is.
1576 
1577  Returns
1578  -------
1579  ROOT.TH1
1580  Cumulated histogram potentially altered inplace.
1581  """
1582  if not cumulation_direction:
1583  return histogram
1584 
1585  cumulate_backward = cumulation_direction < 0
1586  cumulate_forward = not cumulate_backward
1587 
1588  if isinstance(histogram, ROOT.TH2):
1589  raise ValueError("Cannot cumulate a two dimensional histogram.")
1590 
1591  if isinstance(histogram, ROOT.TH3):
1592  raise ValueError("Cannot cumulate a three dimensional histogram.")
1593 
1594  if not isinstance(histogram, ROOT.TH1):
1595  raise ValueError("Can only cumulate a one dimensional histogram.")
1596 
1597  if isinstance(histogram, ROOT.TProfile):
1598  tprofile = histogram
1599  # Turn the profile histogram into graph where we can set the new content and errors
1600  tgraph = cls.convert_tprofile_to_tgrapherrors(histogram)
1601  tgraph.SetName(tprofile.GetName())
1602 
1603  n_bins = histogram.GetNbinsX()
1604  n_points = n_bins
1605  cumulated_content = 0.0
1606  cumulated_entries = 0
1607  cumulated_std = 0.0
1608 
1609  # Always include the overflow bins.
1610  i_bins = list(range(0, n_bins + 2))
1611  if not cumulate_forward:
1612  i_bins = reversed(i_bins)
1613 
1614  for i_bin in i_bins:
1615  i_point = i_bin - 1
1616  bin_content = tprofile.GetBinContent(i_bin)
1617  bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1618  bin_std = tprofile.GetBinError(i_bin)
1619 
1620  if bin_entries != 0:
1621  cumulated_content = (
1622  1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1623  (cumulated_entries + bin_entries)
1624  )
1625 
1626  cumulated_std = (
1627  math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1628  (cumulated_entries + bin_entries)
1629  )
1630 
1631  cumulated_entries = cumulated_entries + bin_entries
1632  else:
1633  # empty bin does not add anything to the cumulation
1634  pass
1635 
1636  if i_point >= 0 and i_point < n_points:
1637  x = tgraph.GetX()[i_point]
1638  # x = ROOT.Double()
1639  # y = ROOT.Double()
1640  # tgraph.GetPoint(i_point, x, y)
1641  tgraph.SetPoint(i_point, x, cumulated_content)
1642 
1643  x_error = tgraph.GetErrorX(i_point)
1644  tgraph.SetPointError(i_point, x_error, cumulated_std)
1645  return tgraph
1646 
1647  else:
1648  # Always include the overflow bins.
1649  n_bins = histogram.GetNbinsX()
1650  cumulated_content = 0.0
1651 
1652  i_bins = list(range(0, n_bins + 2))
1653  if not cumulate_forward:
1654  i_bins = reversed(i_bins)
1655 
1656  for i_bin in i_bins:
1657  bin_content = histogram.GetBinContent(i_bin)
1658  cumulated_content += bin_content
1659  histogram.SetBinContent(i_bin, cumulated_content)
1660 
1661  return histogram
1662 
1664  xs,
1665  stackbys=None,
1666  bins=None,
1667  lower_bound=None,
1668  upper_bound=None,
1669  outlier_z_score=None,
1670  include_exceptionals=True,
1671  allow_discrete=False):
1672  """Deducing bin edges from a data series.
1673 
1674  Parameters
1675  ----------
1676  xs : numpy.ndarray (1d)
1677  Data point for which a binning should be found.
1678  stackbys : numpy.ndarray (1d)
1679  Categories of the data points to be distinguishable
1680  bins : list(float) or int or None, optional
1681  Preset bin edges or preset number of desired bins.
1682  The default, None, means the bound should be extracted from data.
1683  The rice rule is used the determine the number of bins.
1684  If a list of floats is given return them immediatly.
1685  lower_bound : float or None, optional
1686  Preset lower bound of the binning range.
1687  The default, None, means the bound should be extracted from data.
1688  upper_bound : float or None, optional
1689  Preset upper bound of the binning range.
1690  The default, None, means the bound should be extracted from data.
1691  outlier_z_score : float or None, optional
1692  Threshold z-score of outlier detection.
1693  The default, None, means no outlier detection.
1694  include_exceptionals : bool, optional
1695  If the outlier detection is active this switch indicates,
1696  if values detected as exceptionally frequent shall be included
1697  nevertheless into the binning range. Default is True,
1698  which means exceptionally frequent values as included
1699  even if they are detected as outliers.
1700 
1701  Returns
1702  -------
1703  np.array (1d), list(str)
1704  Pair of bin edges and labels deduced from the series.
1705  Second element is None if the series is not detected as discrete.
1706  """
1707  debug = get_logger().debug
1708  debug('Determine binning for plot named %s', self.name)
1709 
1710  if bins == 'flat':
1711  # Special value for the flat distribution binning
1712  n_bins = None
1713 
1714  elif isinstance(bins, collections.Iterable):
1715  # Bins is considered as an array
1716  # Construct a float array forwardable to root.
1717  bin_edges = bins
1718  bin_edges = array.array('d', bin_edges)
1719  bin_labels = None
1720  return bin_edges, bin_labels
1721 
1722  # If bins is not an iterable assume it is the number of bins or None
1723  elif bins is None:
1724  n_bins = None
1725  else:
1726  # Check that bins can be coerced to an integer.
1727  n_bins = int(bins)
1728 
1729  # Do not allow negative bin numbers
1730  if not n_bins > 0:
1731  message = 'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % bins
1732  raise ValueError(message)
1733 
1734  # Coerce values to a numpy array. Do not copy if already a numpy array.
1735  xs = np.array(xs, copy=False)
1736 
1737  if self.is_binary(xs) or (allow_discrete and self.is_discrete(xs)):
1738  # This covers also the case
1739  debug('Discrete binning values encountered')
1740  finite_xs = xs[np.isfinite(xs)]
1741  unique_xs = np.unique(finite_xs)
1742 
1743  # Crop the unique values between the lower and upper bound
1744  if lower_bound is None:
1745  if len(unique_xs) == 0:
1746  if upper_bound is None:
1747  lower_bound = 0
1748  else:
1749  lower_bound = upper_bound - 1
1750  else:
1751  lower_bound = np.min(unique_xs)
1752  else:
1753  unique_xs = unique_xs[unique_xs >= lower_bound]
1754 
1755  if upper_bound is None:
1756  if len(unique_xs) == 0:
1757  upper_bound = lower_bound + 1
1758  else:
1759  upper_bound = np.min(unique_xs)
1760  else:
1761  unique_xs = unique_xs[unique_xs <= upper_bound]
1762 
1763  if n_bins is None:
1764  n_bins = len(unique_xs) or 1
1765 
1766  if len(unique_xs) > 0 and n_bins >= len(unique_xs):
1767  # Construct a float array forwardable to root.
1768  bin_edges = array.array('d', unique_xs)
1769  format_bin_label = self.format_bin_label
1770  bin_labels = [format_bin_label(value) for value in bin_edges]
1771  bin_edges.append(bin_edges[-1] + 1)
1772  return bin_edges, bin_labels
1773 
1774  else:
1775  # Ambiguous case what to do in case of a number of requested bins
1776  # that is lower than the number of unique values?
1777 
1778  # Continue with an equistant binning for now.
1779  pass
1780 
1781  debug('Lower bound %s', lower_bound)
1782  debug('Upper bound %s', upper_bound)
1783  debug('N bins %s', n_bins)
1784 
1785  else:
1786  bin_range = self.determine_bin_range(xs,
1787  stackbys=stackbys,
1788  n_bins=n_bins,
1789  lower_bound=lower_bound,
1790  upper_bound=upper_bound,
1791  outlier_z_score=outlier_z_score,
1792  include_exceptionals=include_exceptionals)
1793 
1794  n_bins, lower_bound, upper_bound = bin_range
1795 
1796  n_bin_edges = n_bins + 1
1797  if lower_bound != upper_bound:
1798  if bins == "flat":
1799  debug("Creating flat distribution binning")
1800  precentiles = np.linspace(0.0, 100.0, n_bin_edges)
1801  bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], precentiles))
1802  else:
1803  # Correct the upper bound such that all values are strictly smaller than the upper bound
1804  # Make one step in single precision in the positive direction
1805  bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1806 
1807  # Reinforce the upper and lower bound to be exact
1808  # Also expand the upper bound by an epsilon
1809  # to prevent the highest value in xs from going in the overflow bin
1810  bin_edges[0] = lower_bound
1811  bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1812  debug('Bins %s', bin_edges)
1813 
1814  else:
1815  # Fall back if the array contains only one value
1816  bin_edges = [lower_bound, upper_bound + 1]
1817 
1818  # Construct a float array forwardable to root.
1819  bin_edges = array.array('d', bin_edges)
1820  debug('Bins %s for %s', bin_edges, self.name)
1821  return bin_edges, None
1822 
1824  xs,
1825  stackbys=None,
1826  n_bins=None,
1827  lower_bound=None,
1828  upper_bound=None,
1829  outlier_z_score=None,
1830  include_exceptionals=True):
1831  """Calculates the number of bins, the lower bound and the upper bound from a given data series
1832  estimating the values that are not given.
1833 
1834  If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1835  The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1836  mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1837  quantile range (IQR).
1838 
1839  If additional include_exceptionals is true the method tries to find exceptional values in the series
1840  and always include them in the range if it finds any.
1841  Exceptional values means exact values that appear often in the series for whatever reason.
1842  Possible reasons include
1843  * Interal / default values
1844  * Failed evaluation conditions
1845  * etc.
1846  which should be not cropped away automatically if you are locking on the quality of your data.
1847 
1848  Parameters
1849  ----------
1850  xs : numpy.ndarray (1d)
1851  Data point for which a binning should be found.
1852  stackbys : numpy.ndarray (1d)
1853  Categories of the data points to be distinguishable
1854  n_bins : int or None, optional
1855  Preset number of desired bins. The default, None, means the bound should be extracted from data.
1856  The rice rule is used the determine the number of bins.
1857  lower_bound : float or None, optional
1858  Preset lower bound of the binning range.
1859  The default, None, means the bound should be extracted from data.
1860  upper_bound : float or None, optional
1861  Preset upper bound of the binning range.
1862  The default, None, means the bound should be extracted from data.
1863  outlier_z_score : float or None, optional
1864  Threshold z-score of outlier detection.
1865  The default, None, means no outlier detection.
1866  include_exceptionals : bool, optional
1867  If the outlier detection is active this switch indicates,
1868  if values detected as exceptionally frequent shall be included
1869  nevertheless into the binning range. Default is True,
1870  which means exceptionally frequent values as included
1871  even if they are detected as outliers.
1872 
1873  Returns
1874  -------
1875  n_bins, lower_bound, upper_bound : int, float, float
1876  A triple of found number of bins, lower bound and upper bound of the binning range.
1877  """
1878 
1879  if stackbys is not None:
1880  unique_stackbys = np.unique(stackbys)
1881  stack_ranges = []
1882  for value in unique_stackbys:
1883  if np.isnan(value):
1884  indices_for_value = np.isnan(stackbys)
1885  else:
1886  indices_for_value = stackbys == value
1887 
1888  stack_lower_bound, stack_upper_bound = \
1889  self.determine_range(xs[indices_for_value],
1890  lower_bound=lower_bound,
1891  upper_bound=upper_bound,
1892  outlier_z_score=outlier_z_score,
1893  include_exceptionals=include_exceptionals)
1894 
1895  stack_ranges.append([stack_lower_bound, stack_upper_bound])
1896 
1897  lower_bound = np.nanmin([lwb for lwb, upb in stack_ranges])
1898  upper_bound = np.nanmax([upb for lwb, upb in stack_ranges])
1899 
1900  else:
1901  lower_bound, upper_bound = self.determine_range(xs,
1902  lower_bound=lower_bound,
1903  upper_bound=upper_bound,
1904  outlier_z_score=outlier_z_score,
1905  include_exceptionals=include_exceptionals)
1906 
1907  if n_bins is None:
1908  # Assume number of bins according to the rice rule.
1909  # The number of data points should not include outliers.
1910  n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1911  rice_n_bins = int(statistics.rice_n_bin(n_data))
1912  n_bins = rice_n_bins
1913 
1914  else:
1915  n_bins = int(n_bins)
1916  # Do not allow negative bin numbers
1917  if not n_bins > 0:
1918  message = 'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % bins
1919  raise ValueError(message)
1920 
1921  return n_bins, lower_bound, upper_bound
1922 
1924  xs,
1925  lower_bound=None,
1926  upper_bound=None,
1927  outlier_z_score=None,
1928  include_exceptionals=True):
1929  """
1930  Parameters
1931  ----------
1932  xs : numpy.ndarray (1d)
1933  Data point for which a binning should be found.
1934  lower_bound : float or None, optional
1935  Preset lower bound of the binning range.
1936  The default, None, means the bound should be extracted from data.
1937  upper_bound : float or None, optional
1938  Preset upper bound of the binning range.
1939  The default, None, means the bound should be extracted from data.
1940  outlier_z_score : float or None, optional
1941  Threshold z-score of outlier detection.
1942  The default, None, means no outlier detection.
1943  include_exceptionals : bool, optional
1944  If the outlier detection is active this switch indicates,
1945  if values detected as exceptionally frequent shall be included
1946  nevertheless into the binning range. Default is True,
1947  which means exceptionally frequent values as included
1948  even if they are detected as outliers.
1949 
1950  Returns
1951  -------
1952  lower_bound, upper_bound : float, float
1953  A pair of found lower bound and upper bound of series.
1954  """
1955  debug = get_logger().debug
1956 
1957  finite_xs_indices = np.isfinite(xs)
1958  if np.any(finite_xs_indices):
1959  finite_xs = xs[finite_xs_indices]
1960  else:
1961  finite_xs = xs
1962 
1963  make_symmetric = False
1964  exclude_outliers = outlier_z_score is not None and (lower_bound is None or upper_bound is None)
1965 
1966  # Look for exceptionally frequent values in the series, e.g. interal delta values like -999
1967  if include_exceptionals or exclude_outliers:
1968  exceptional_xs = self.get_exceptional_values(finite_xs)
1969  exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1970 
1971  # Prepare for the estimation of outliers
1972  if exclude_outliers:
1973  if not np.all(exceptional_indices):
1974  # Exclude excecptional values from the estimation to be unbiased
1975  # even in case exceptional values fall into the central region near the mean
1976  x_mean, x_std = self.get_robust_mean_and_std(finite_xs[~exceptional_indices])
1977  else:
1978  x_mean, x_std = np.nan, np.nan
1979 
1980  make_symmetric = abs(x_mean) < x_std / 5.0 and lower_bound is None and upper_bound is None
1981 
1982  if include_exceptionals and len(exceptional_xs) != 0:
1983  lower_exceptional_x = np.min(exceptional_xs)
1984  upper_exceptional_x = np.max(exceptional_xs)
1985  make_symmetric = False
1986  else:
1987  lower_exceptional_x = np.nan
1988  upper_exceptional_x = np.nan
1989 
1990  # Find the lower bound, if it is not given.
1991  if lower_bound is None:
1992  try:
1993  lower_bound = np.min(finite_xs)
1994  except ValueError:
1995  lower_bound = -999
1996  # Clip the lower bound by outliers that exceed the given z score
1997  if outlier_z_score is not None:
1998  # The lower bound at which outliers exceed the given z score
1999  lower_outlier_bound = x_mean - outlier_z_score * x_std
2000 
2001  # Clip the lower bound such that it concides with an actual value,
2002  # which prevents empty bins from being produced
2003  indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2004 
2005  if np.any(indices_above_lower_outlier_bound):
2006  lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2007 
2008  # However we want to include at least the exceptional values in the range if there are any.
2009  lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2010 
2011  debug('Lower bound after outlier detection')
2012  debug('Lower bound %s', lower_bound)
2013  debug('Lower outlier bound %s', lower_outlier_bound)
2014 
2015  # Find the upper bound, if it is not given
2016  if upper_bound is None:
2017  try:
2018  upper_bound = np.max(finite_xs)
2019  except ValueError:
2020  upper_bound = 999
2021  if outlier_z_score is not None:
2022  # The upper bound at which outliers exceed the given z score
2023  upper_outlier_bound = x_mean + outlier_z_score * x_std
2024 
2025  # Clip the upper bound such that it concides with an actual value,
2026  # which prevents empty bins from being produced
2027  indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2028 
2029  if np.any(indices_below_upper_outlier_bound):
2030  upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2031 
2032  # However we want to include at least the exceptional values in the range if there are any.
2033  upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2034 
2035  debug('Upper bound after outlier detection')
2036  debug('Upper bound %s', upper_bound)
2037  debug('Upper outlier bound %s', upper_outlier_bound)
2038 
2039  if make_symmetric and lower_bound < 0 and upper_bound > 0:
2040  if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2041  abs_bound = max(abs(lower_bound), abs(upper_bound))
2042  lower_bound = -abs_bound
2043  upper_bound = abs_bound
2044 
2045  return lower_bound, upper_bound
2046 
2047  @classmethod
2048  def set_additional_stats_tf1(cls, histogram):
2049  """Combining fit TF1 with the additional statistics and attach them to the histogram.
2050 
2051  Parameters
2052  ----------
2053  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2054  Something having a GetListOfFunctions method that should hold
2055  the combined fit and additional statistics function.
2056  """
2057  additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2058  cls.set_tf1(histogram, additional_stats_tf1)
2059 
2060  @classmethod
2061  def set_fit_tf1(cls, histogram, fit_tf1):
2062  """Combining fit TF1 with the additional statistics and attach them to the histogram.
2063 
2064  Parameters
2065  ----------
2066  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2067  Something having a GetListOfFunctions method that should hold
2068  the combined fit and additional statistics function.
2069  """
2070  additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2071  combined_tf1 = cls.combine_fit_and_additional_stats(fit_tf1, additional_stats_tf1)
2072  cls.set_tf1(histogram, combined_tf1)
2073 
2074  @classmethod
2075  def set_tf1(cls, histogram, tf1):
2076  """Set the attached TF1 of the histogram.
2077 
2078  Parameters
2079  ----------
2080  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2081  Something having a GetListOfFunctions method that should hold
2082  the combined fit and additional statistics function.
2083  """
2084  # Delete any functions formally added
2085  cls.delete_tf1(histogram)
2086  if tf1 is not None:
2087  tf1.SetName("FitAndStats")
2088  histogram.GetListOfFunctions().Add(tf1)
2089 
2090  @classmethod
2091  def delete_tf1(cls, histogram):
2092  """Delete the attached TF1 from the histogram
2093 
2094  Parameters
2095  ----------
2096  histogram : ROOT.TH1 or ROOT.TGraph
2097  Something having a GetListOfFunctions method that holds the fit function
2098  """
2099  tf1 = histogram.FindObject("FitAndStats")
2100  if tf1:
2101  function_list = histogram.GetListOfFunctions()
2102  function_list.Remove(tf1)
2103 
2104  @classmethod
2105  def create_additional_stats_tf1(cls, histogram):
2106  """Create a TF1 with the additional statistics from the histogram as parameters.
2107 
2108  Parameters
2109  ----------
2110  histogram : ROOT.TH1 or ROOT.TGraph
2111  Something having a GetListOfFunctions method that holds the additional statistics.
2112 
2113  Returns
2114  -------
2115  ROOT.TF1
2116  Function with the additional statistics as parameters.
2117  """
2118 
2119  additional_stats = cls.get_additional_stats(histogram)
2120  if not additional_stats:
2121  return None
2122 
2123  # Create dummy function, which displays additional statistics in the legend, when added to a histogram.
2124  # Dummy range to serve the functions
2125  lower_bound = 0
2126  upper_bound = 0
2127 
2128  # Create a formula which is zero in all cases but has space for n parameters
2129  # Formula string looks like 0*[0]+0*[1]+0*[2]+...
2130  formula_string = '+'.join('0*[' + str(i) + ']' for i in range(len(additional_stats)))
2131 
2132  # Compose a function that carries the addtional information
2133  additional_stats_tf1 = ROOT.TF1("Stats", formula_string, lower_bound, upper_bound)
2134 
2135  for (i, (label, value)) in enumerate(additional_stats.items()):
2136  # root 6 does not like labels with spaces ..
2137  # bug report: https://sft.its.cern.ch/jira/browse/ROOT-7460
2138  # therefor this workaround:
2139  label = label.replace(" ", "-")
2140  additional_stats_tf1.SetParName(i, label)
2141  additional_stats_tf1.FixParameter(i, value)
2142 
2143  return additional_stats_tf1
2144 
2145  @classmethod
2146  def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1):
2147  """Combine the fit function and the function carrying the additional statistics to one function.
2148 
2149  Parameters
2150  ----------
2151  fit_tf1 : ROOT.TF1
2152  The fit function
2153  additional_stats_tf1 : ROOT.TF1
2154  The function carrying the additional statistics as parameters
2155 
2156  Returns
2157  -------
2158  ROOT.TF1
2159  """
2160  if additional_stats_tf1 is None:
2161  return fit_tf1
2162 
2163  # Combine both TF1 functions
2164  # Get the lower and upper bound of the fit
2165  # Use the pass-by reference containers from pyROOT to be able to call the function
2166  lower_bound = ROOT.Double()
2167  upper_bound = ROOT.Double()
2168  fit_tf1.GetRange(lower_bound, upper_bound)
2169  title = fit_tf1.GetTitle()
2170 
2171  combined_formula = additional_stats_tf1.GetExpFormula().Data() + '+' + fit_tf1.GetExpFormula().Data()
2172  combined_tf1 = ROOT.TF1("Combined", combined_formula, lower_bound, upper_bound)
2173  combined_tf1.SetTitle(title)
2174 
2175  # Transfer the fitted parameters
2176  chi2 = fit_tf1.GetChisquare()
2177  combined_tf1.SetChisquare(chi2)
2178 
2179  ndf = fit_tf1.GetNDF()
2180  combined_tf1.SetNDF(ndf)
2181 
2182  n_stats_parameters = additional_stats_tf1.GetNpar()
2183  n_fit_parameters = fit_tf1.GetNpar()
2184  cls.copy_tf1_parameters(additional_stats_tf1, combined_tf1)
2185  cls.copy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2186 
2187  return combined_tf1
2188 
2189  @classmethod
2190  def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0):
2191  """Copy the parameters of one TF1 to another.
2192 
2193  Parameters
2194  ----------
2195  tf1_source : ROOT.TF1
2196  Take parameters from here
2197  tf1_target : ROOT.TF1
2198  Copy them to here.
2199  offset : int, optional
2200  Index of the first target parameter to which to copy.
2201  """
2202  n_parameters = tf1_source.GetNpar()
2203 
2204  # Helper variables for pyROOT's mechanism to call functions by reference
2205  lower_bound = ROOT.Double()
2206  upper_bound = ROOT.Double()
2207 
2208  for i_source in range(n_parameters):
2209  parameter_name = tf1_source.GetParName(i_source)
2210  i_target = tf1_target.GetParNumber(parameter_name)
2211 
2212  # Workaround for a ROOT bug
2213  if i_target == -1:
2214  for i_target in range(target_tf1.GetNpar()):
2215  if parameter_name == target_tf1.GetParName(i_target):
2216  break
2217  else:
2218  i_target = -1
2219  continue
2220 
2221  tf1_target.SetParameter(i_target,
2222  tf1_source.GetParameter(i_source))
2223  tf1_target.SetParError(i_target,
2224  tf1_source.GetParError(i_source))
2225 
2226  tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2227  tf1_target.SetParLimits(i_target, lower_bound, upper_bound)
2228 
2230  """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2231  # Forward the attributes to the plot by auto assignment
2232 
2233  self.check = self.check
2234 
2235  self.contact = self.contact
2236 
2238 
2239 
2240  self.xlabel = self.xlabel
2241 
2242  self.ylabel = self.ylabel
2243 
2244  self.title = self.title
2245 
2246  def set_maximum(self, maximum):
2247  """Sets the maximum of the vertical plotable range"""
2248  for histogram in self.histograms:
2249  if isinstance(histogram, ROOT.TH1):
2250  histogram.SetMaximum(histogram.GetMaximum(maximum))
2251  else:
2252  histogram.SetMaximum(maximum)
2253 
2254  def set_minimum(self, minimum):
2255  """Sets the minimum of the vertical plotable range"""
2256  for histogram in self.histograms:
2257  if isinstance(histogram, ROOT.TH1):
2258  histogram.SetMinimum(histogram.GetMinimum(minimum))
2259  else:
2260  histogram.SetMinimum(minimum)
2261 
2262  @classmethod
2263  def set_tstyle(cls):
2264  """Set the style such that the additional stats entries are shown by the TBrowser"""
2265  belle2_validation_style_name = "belle2_validation_style"
2266  belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2267  if not belle2_validation_tstyle:
2268  belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2269 
2270  opt_fit = 112
2271  belle2_validation_tstyle.SetOptFit(opt_fit)
2272 
2273  opt_stat = 111111
2274  belle2_validation_tstyle.SetOptStat(opt_stat)
2275  ROOT.gROOT.SetStyle(belle2_validation_style_name)
2276 
2277  # belle2_validation_tstyle.SetLineStyleString(cls.very_sparse_dots_line_style_index, "4 2000")
2278 
2279  else:
2280  belle2_validation_tstyle.cd()
2281 
2282 
2283 def test():
2284  """Simple test methode"""
2285  ValidationPlot.set_tstyle()
2286 
2287  # Test a histogram plot with some nan and inf values
2288  normal_distributed_values = np.random.randn(1000)
2289 
2290  for i in range(10):
2291  normal_distributed_values[i] = np.nan
2292 
2293  for i in range(10, 20):
2294  normal_distributed_values[i] = np.inf
2295 
2296  for i in range(20, 30):
2297  normal_distributed_values[i] = -np.inf
2298 
2299  validation_histogram = ValidationPlot('test_hist')
2300  validation_histogram.hist(normal_distributed_values)
2301  validation_histogram.title = 'A normal distribution'
2302  validation_histogram.xlabel = 'normal'
2303  validation_histogram.ylabel = 'frequency'
2304  validation_histogram.fit_gaus()
2305 
2306  # Test for a cumulated histogram - cumulation from left to right
2307  cumulated_histogram = ValidationPlot('test_cum_hist')
2308  cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2309  cumulated_histogram.title = 'A cumulated normal distribution'
2310  cumulated_histogram.xlabel = 'normal'
2311  cumulated_histogram.ylabel = 'cdf'
2312  cumulated_histogram.show()
2313 
2314  # Test stacked histogram
2315  # Make a random selection of 40%
2316  stackby = np.random.binomial(1.0, 0.40, 1000)
2317  stacked_validation_histogram = ValidationPlot('test_stacked_hist')
2318  stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2319 
2320  # Make a scatterplot with two species.
2321  x = np.random.randn(1000)
2322  y = 3 * np.random.randn(1000)
2323  ones = np.ones_like(x)
2324  angle = np.pi / 2
2325 
2326  x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2327  y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2328 
2329  stacked_validation_scatter = ValidationPlot('test_stacked_scatter')
2330  stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2331 
2332  # Make a profile plot with the two species
2333  stacked_validation_profile = ValidationPlot('test_stacked_profile')
2334  stacked_validation_profile.profile(x1, y1, stackby=stackby)
2335 
2336  # Make a two dimensional histogram with two species
2337  stacked_validation_hist2d = ValidationPlot('test_stacked_hist2d')
2338  stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2339 
2340  # Test a profile with a diagonal fit
2341  x = np.linspace(-1, 1, 1000)
2342  y = x.copy()
2343  x[0] = np.nan
2344  diagonal_plot = ValidationPlot('test_diag')
2345  diagonal_plot.profile(x, y, bins=50)
2346  diagonal_plot.fit_line()
2347 
2348  # Test if cumulation works for profile plots
2349  cumulated_profile = ValidationPlot('test_cum_profile')
2350  cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2351 
2352  tfile = ROOT.TFile('test.root', 'RECREATE')
2353 
2354  validation_histogram.write(tfile)
2355 
2356  with root_cd("expert") as tdirectory1:
2357  diagonal_plot.write(tdirectory1)
2358  cumulated_profile.write(tdirectory1)
2359  cumulated_histogram.write(tdirectory1)
2360 
2361  with root_cd("stacked") as tdirectory2:
2362  stacked_validation_histogram.write(tdirectory2)
2363  stacked_validation_scatter.write()
2364  stacked_validation_profile.write()
2365  stacked_validation_hist2d.write()
2366 
2367  tfile.Close()
2368 
2369  tfile = ROOT.TFile('test.root')
2370  tBrowser = ROOT.TBrowser()
2371  tBrowser.BrowseObject(tfile)
2372  input()
2373  tfile.Close()
2374 
2375 
2376 if __name__ == '__main__':
2377  test()
tracking.validation.plot.ValidationPlot.y_log
y_log
Indicator whether the y axes should be displayed as a log scale.
Definition: plot.py:231
tracking.validation.plot.ValidationPlot.convert_tprofile_to_tgrapherrors
def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False)
Definition: plot.py:1113
tracking.validation.plot.ValidationPlot.create_1d
def create_1d(self, th1_factory, xs, ys=None, weights=None, bins=None, stackby=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None)
Definition: plot.py:972
tracking.validation.plot.ValidationPlot.very_sparse_dots_line_style_index
int very_sparse_dots_line_style_index
A an index that reference to a dot spacing such that the line is almost invisible for scatter.
Definition: plot.py:174
tracking.validation.plot.ValidationPlot.determine_bin_range
def determine_bin_range(self, xs, stackbys=None, n_bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1823
tracking.validation.plot.ValidationPlot.create_stack
def create_stack(cls, histograms, name, reverse_stack, force_graph=False)
Definition: plot.py:1081
tracking.validation.plot.ValidationPlot.set_minimum
def set_minimum(self, minimum)
Definition: plot.py:2254
tracking.validation.plot.ValidationPlot.create
def create(self, histogram_template, xs, ys=None, weights=None, stackby=None, cumulation_direction=None, reverse_stack=None)
Definition: plot.py:1038
tracking.validation.plot.ValidationPlot.create_additional_stats_tf1
def create_additional_stats_tf1(cls, histogram)
Definition: plot.py:2105
tracking.validation.plot.ValidationPlot.add_stats_entry
def add_stats_entry(cls, histogram, label, value)
Definition: plot.py:1459
tracking.validation.plot.ValidationPlot.copy_tf1_parameters
def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0)
Definition: plot.py:2190
tracking.validation.plot.ValidationPlot.lower_bound
lower_bound
lower left corner of the histogram
Definition: plot.py:520
tracking.validation.plot.ValidationPlot._contact
_contact
Contact email address for display on the validation page.
Definition: plot.py:204
tracking.validation.plot.ValidationPlot.is_discrete
def is_discrete(xs, max_n_unique=None)
Definition: plot.py:921
tracking.validation.plot.ValidationPlot.__init__
def __init__(self, name, referenceFileName=None)
Definition: plot.py:177
tracking.validation.plot.ValidationPlot.check
check
cached value of the user-check action for this plot
Definition: plot.py:2233
tracking.validation.plot.ValidationPlot.cumulate
def cumulate(cls, histogram, cumulation_direction=None)
Definition: plot.py:1565
tracking.validation.plot.ValidationPlot.pvalue_warn
pvalue_warn
custom levels for pvalue warnings
Definition: plot.py:225
tracking.validation.plot.ValidationPlot.unpack_2d_param
def unpack_2d_param(param)
Definition: plot.py:887
tracking.validation.plot.ValidationPlot.set_color
def set_color(self, tobject, root_i_color)
Definition: plot.py:1222
tracking.validation.plot.ValidationPlot.fill_into_grouped
def fill_into_grouped(self, histogram_template, xs, ys=None, weights=None, groupbys=None, groupby_label="group")
Definition: plot.py:1184
tracking.validation.plot.ValidationPlot.referenceFileName
referenceFileName
name of the reference file, if not None the binning will be read from there
Definition: plot.py:195
tracking.validation.plot.ValidationPlot.fit_gaus
def fit_gaus(self, z_score=None)
Definition: plot.py:608
tracking.validation.plot.ValidationPlot._xlabel
_xlabel
X axes label of the validation plot.
Definition: plot.py:207
tracking.validation.plot.ValidationPlot.get_additional_stats
def get_additional_stats(cls, histogram)
Definition: plot.py:1476
tracking.validation.plot.ValidationPlot.fit_line
def fit_line(self)
Definition: plot.py:647
tracking.validation.plot.ValidationPlot.set_maximum
def set_maximum(self, maximum)
Definition: plot.py:2246
tracking.validation.plot.ValidationPlot.title
title
cached value of the title for this plot
Definition: plot.py:2244
tracking.validation.plot.ValidationPlot.histograms
histograms
A list of the histograms that make up the plot.
Definition: plot.py:219
tracking.validation.plot.ValidationPlot._check
_check
Detailed check instructions for display on the validation page.
Definition: plot.py:201
tracking.validation.plot.ValidationPlot.is_expert
def is_expert(self)
Definition: plot.py:790
tracking.validation.plot.ValidationPlot.determine_range
def determine_range(self, xs, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1923
tracking.validation.plot.ValidationPlot.hist
def hist(self, xs, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, is_expert=True)
Definition: plot.py:233
tracking.validation.plot.ValidationPlot.set_tstyle
def set_tstyle(cls)
Definition: plot.py:2263
tracking.validation.plot.ValidationPlot.fit_diag
def fit_diag(self)
Definition: plot.py:666
tracking.validation.plot.ValidationPlot.fill_into_tgraph
def fill_into_tgraph(self, graph, xs, ys, filter=None)
Definition: plot.py:1266
tracking.validation.plot.ValidationPlot._ylabel
_ylabel
Y axes label of the validation plot.
Definition: plot.py:210
tracking.validation.plot.ValidationPlot.set_tf1
def set_tf1(cls, histogram, tf1)
Definition: plot.py:2075
tracking.root_utils
Definition: root_utils.py:1
tracking.validation.plot.ValidationPlot.delete_tf1
def delete_tf1(cls, histogram)
Definition: plot.py:2091
tracking.validation.plot.ValidationPlot.is_binary
def is_binary(xs)
Definition: plot.py:916
tracking.validation.plot.ValidationPlot.contact
contact
contact information for this plot
Definition: plot.py:2235
tracking.validation.plot.ValidationPlot.grapherrors
def grapherrors(self, xs_and_err, ys_and_err, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
Definition: plot.py:417
tracking.validation.plot.ValidationPlot.set_fit_tf1
def set_fit_tf1(cls, histogram, fit_tf1)
Definition: plot.py:2061
tracking.validation.plot.ValidationPlot.profile
def profile(self, xs, ys, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, y_binary=None, y_log=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, gaus_z_score=None, is_expert=True)
Definition: plot.py:276
tracking.validation.plot.ValidationPlot.scatter
def scatter(self, xs, ys, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
Definition: plot.py:358
tracking.validation.plot.ValidationPlot.determine_bin_edges
def determine_bin_edges(self, xs, stackbys=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False)
Definition: plot.py:1663
tracking.validation.plot.ValidationPlot.fill_into_tgrapherror
def fill_into_tgrapherror(self, graph, xs, ys, filter=None)
Definition: plot.py:1255
tracking.validation.plot.ValidationPlot.fit
def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None)
Definition: plot.py:675
Belle2::slice
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)
Definition: Splitter.h:89
tracking.validation.plot.ValidationPlot.plot
plot
The main plot object, may contain one or more (in case of stacked pltos) histograms.
Definition: plot.py:216
tracking.validation.plot.ValidationPlot.fill_into_th1
def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1350
tracking.validation.plot.ValidationPlot
Definition: plot.py:152
tracking.validation.plot.ValidationPlot.show
def show(self)
Definition: plot.py:740
tracking.validation.plot.ValidationPlot.combine_fit_and_additional_stats
def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1)
Definition: plot.py:2146
tracking.validation.plot.ValidationPlot.format_bin_label
def format_bin_label(value)
Definition: plot.py:960
tracking.validation.plot.ValidationPlot.xlabel
xlabel
cached value of the x-axis label for this plot
Definition: plot.py:2240
tracking.validation.plot.ValidationPlot.description
description
description of the plot
Definition: plot.py:2237
tracking.validation.plot.ValidationPlot._title
_title
Title of the validation plot.
Definition: plot.py:213
tracking.validation.plot.ValidationPlot.name
name
A unique name to be used as the name of the ROOT object to be generated.
Definition: plot.py:192
tracking.validation.plot.ValidationPlot.set_additional_stats_tf1
def set_additional_stats_tf1(cls, histogram)
Definition: plot.py:2048
tracking.validation.plot.ValidationPlot.attach_attributes
def attach_attributes(self)
Definition: plot.py:2229
tracking.validation.plot.ValidationPlot.add_nan_inf_stats
def add_nan_inf_stats(cls, histogram, name, xs)
Definition: plot.py:1433
tracking.validation.plot.ValidationPlot.get_exceptional_values
def get_exceptional_values(xs)
Definition: plot.py:926
tracking.validation.plot.ValidationPlot.write
def write(self, tdirectory=None)
Definition: plot.py:747
tracking.validation.plot.ValidationPlot.fill_into
def fill_into(self, plot, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1241
tracking.validation.plot.ValidationPlot._description
_description
Description of the plot purpose for display on the validation page.
Definition: plot.py:198
tracking.validation.plot.ValidationPlot.pvalue_error
pvalue_error
custom levels for pvalue errors
Definition: plot.py:228
tracking.validation.plot.ValidationPlot.gaus_slice_fit
def gaus_slice_fit(cls, th2, name, z_score=None)
Definition: plot.py:1499
tracking.validation.plot.ValidationPlot.ylabel
ylabel
default label for the histogram's Y axis
Definition: plot.py:261
tracking.validation.plot.ValidationPlot.upper_bound
upper_bound
upper right corner of the hisogram
Definition: plot.py:522
tracking.validation.plot.ValidationPlot._is_expert
_is_expert
per default all plots are expert and must be set to non-expert explicitly
Definition: plot.py:222
tracking.validation.plot.ValidationPlot.fit_const
def fit_const(self)
Definition: plot.py:657
tracking.validation.plot.ValidationPlot.hist2d
def hist2d(self, xs, ys, weights=None, stackby=None, bins=(None, None), lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), allow_discrete=(False, False), quantiles=None, is_expert=True)
Definition: plot.py:478
tracking.validation.plot.ValidationPlot.get_robust_mean_and_std
def get_robust_mean_and_std(xs)
Definition: plot.py:942