Belle II Software  release-06-02-00
plot.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import sys
13 import math
14 import collections
15 import array
16 import numpy as np
17 import ctypes
18 
19 import ROOT
20 
21 import basf2
22 
23 from tracking.root_utils import root_cd, root_save_name
24 
25 from tracking.validation import statistics
26 
27 import logging
28 
29 
30 def get_logger():
31  """Get for the logging.Logger instance of this module
32 
33  Returns
34  -------
35  logging.Logger
36  Logger instance of this module
37  """
38  return logging.getLogger(__name__)
39 
40 
41 
42 units_by_quantity_name = {
43  'x': 'cm',
44  'y': 'cm',
45  'z': 'cm',
46  'E': 'GeV',
47  'px': 'GeV',
48  'p_{x}': 'GeV',
49  'py': 'GeV',
50  'p_{y}': 'GeV',
51  'pz': 'GeV',
52  'p_{z}': 'GeV',
53  'pt': 'GeV',
54  'p_{t}': 'GeV',
55  'd0': 'cm',
56  'd_{0}': 'cm',
57  'phi0': None,
58  '#phi_{0}': None,
59  'omega': '1/cm',
60  '#omega': '1/cm',
61  'z0': 'cm',
62  'z_{0}': 'cm',
63  'tan_lambda': None,
64  'tan #lambda': None}
65 
66 
67 def get_unit(quantity_name):
68  """Infers the unit of a quantity from its name.
69 
70  Assumes the standard Belle II unit system.
71 
72  Currently looks up the quantity string from units_by_quantity_name.
73 
74  Parameters
75  ----------
76  quantity_name : str
77  Name of a quantity (E.g. pt, x, ...)
78 
79  Returns
80  -------
81  str
82  """
83 
84  unit = units_by_quantity_name.get(quantity_name, None)
85  return unit
86 
87 
88 def compose_axis_label(quantity_name, unit=None):
89  """Formats a quantity name and a unit to a label for a plot axes.
90 
91  If the unit is not given to is tried to infer it
92  from the quantity name by the get_unit function.
93 
94  Parameters
95  ----------
96  quantity_name : str
97  Name of the quantity to be displayed at the axes
98  unit : str, optional
99  The unit of the quantity. Defaults to get_unit(quantity_name)
100 
101  Returns
102  -------
103  str
104  """
105 
106  if unit is None:
107  unit = get_unit(quantity_name)
108 
109  if unit is None:
110  axis_label = quantity_name
111  else:
112  axis_label = '%s (%s)' % (quantity_name, unit)
113 
114  return axis_label
115 
116 
117 def get1DBinningFromReference(name, refFileName):
118  """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
119 
120  @param name : name of the TH1 object to be looked for in the file
121  @param refFileName : name of the reference file where the object is searched for
122 
123  @return int nbin, float xmin, float xmax of the TH1
124  """
125 
126  nbins = None
127  x_min = None
128  x_max = None
129 
130  if refFileName is None or refFileName == "":
131  return nbins, x_min, x_max
132 
133  # store current directory to not confuse directories by opening a TFile
134  oldDirectory = ROOT.gROOT.CurrentDirectory().load()
135 
136  tfile = ROOT.TFile(refFileName)
137  if tfile.IsOpen():
138  objptr = tfile.Get(name)
139  if objptr and objptr.InheritsFrom("TH1"):
140  nbins = objptr.GetNbinsX()
141  x_min = objptr.GetXaxis().GetXmin()
142  x_max = objptr.GetXaxis().GetXmax()
143  else:
144  basf2.B2WARNING('Requested object with name: ' + name + ' not found in file: ' + refFileName + " (or not a TH1)")
145  else:
146  basf2.B2WARNING('Requested file: ' + refFileName + ' could not be opened')
147 
148  tfile.Close()
149 
150  # set current directory back to original one
151  oldDirectory.cd()
152 
153  return nbins, x_min, x_max
154 
155 
156 
157 StatsEntry = ROOT.TParameter(float)
158 
159 
160 class ValidationPlot(object):
161 
162  """Class for generating a validation plot for the Belle II validation page.
163 
164  Typically it generates plots from values stored in numpy arrays and feeds them into
165  plot ROOT classes for storing them.
166 
167  It implements an automatic binning procedure based on the rice rule and
168  robust z score outlier detection.
169 
170  It also keeps track of additional statistics typically neglected by ROOT such as a count
171  for the non finit values such as NaN, +Inf, -Inf.
172 
173  The special attributes for the Belle II validation page like
174  * title
175  * contract
176  * description
177  * check
178  are exposed as properties of this class.
179  """
180 
181 
182  very_sparse_dots_line_style_index = 28
183  # This line style is defined in the set_tstyle method below.
184 
185  def __init__(self, name, referenceFileName=None):
186  """Constructor of the ValidationPlot
187 
188  Parameters
189  ----------
190  name : str
191  A unique name to be used as the name of the ROOT object to be generated
192 
193  referenceFileName : str
194  name of a reference file. If set the code will try to get the histogram or profile
195  from that file and determine the number of bins and upper and lower bound
196  (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
197  """
198 
199 
200  self.namename = root_save_name(name)
201 
202 
203  self.referenceFileNamereferenceFileName = referenceFileName
204 
205 
206  self._description_description = ''
207 
208 
209  self._check_check = ''
210 
211 
212  self._contact_contact = ''
213 
214 
215  self._xlabel_xlabel = ''
216 
217 
218  self._ylabel_ylabel = ''
219 
220 
221  self._title_title = ''
222 
223 
224  self.plotplot = None
225 
226 
227  self.histogramshistograms = []
228 
229 
230  self._is_expert_is_expert = True
231 
232 
233  self.pvalue_warnpvalue_warn = None
234 
235 
236  self.pvalue_errorpvalue_error = None
237 
238 
239  self.y_logy_log = False
240 
241  def hist(self,
242  xs,
243  weights=None,
244  stackby=None,
245  bins=None,
246  lower_bound=None,
247  upper_bound=None,
248  outlier_z_score=None,
249  include_exceptionals=True,
250  allow_discrete=False,
251  cumulation_direction=None,
252  is_expert=True):
253  """Fill the plot with a one dimensional histogram."""
254 
255  # if referenceFileName was set the binning will taken from there
256  if self.referenceFileNamereferenceFileName is not None:
257  n, xmin, xmax = get1DBinningFromReference(self.namename, self.referenceFileNamereferenceFileName)
258  if n is not None and xmin is not None and xmax is not None:
259  bins = n
260  upper_bound = xmax
261  lower_bound = xmin
262 
263  th1_factory = ROOT.TH1D
264  self._is_expert_is_expert = is_expert
265 
266  self.create_1dcreate_1d(th1_factory,
267  xs,
268  weights=weights,
269  stackby=stackby,
270  bins=bins,
271  lower_bound=lower_bound,
272  upper_bound=upper_bound,
273  outlier_z_score=outlier_z_score,
274  include_exceptionals=include_exceptionals,
275  allow_discrete=allow_discrete,
276  cumulation_direction=cumulation_direction)
277 
278  if not self.ylabelylabelylabelylabel:
279 
280  self.ylabelylabelylabelylabel = 'count'
281 
282  return self
283 
284  def profile(self,
285  xs,
286  ys,
287  weights=None,
288  stackby=None,
289  bins=None,
290  lower_bound=None,
291  upper_bound=None,
292  y_binary=None,
293  y_log=None,
294  outlier_z_score=None,
295  include_exceptionals=True,
296  allow_discrete=False,
297  cumulation_direction=None,
298  gaus_z_score=None,
299  is_expert=True):
300  """Fill the plot with a one dimensional profile of one variable over another."""
301 
302  # if referenceFileName was set the binning will taken from there
303  if self.referenceFileNamereferenceFileName is not None:
304  n = None
305  xmin = None
306  xmax = None
307  n, xmin, xmax = get1DBinningFromReference(self.namename, self.referenceFileNamereferenceFileName)
308  if n is not None and xmin is not None and xmax is not None:
309  bins = n
310  upper_bound = xmax
311  lower_bound = xmin
312 
313  th1_factory = ROOT.TProfile
314  self._is_expert_is_expert = is_expert
315  if gaus_z_score is None:
316  self.create_1dcreate_1d(th1_factory,
317  xs,
318  ys,
319  weights=weights,
320  stackby=stackby,
321  bins=bins,
322  lower_bound=lower_bound,
323  upper_bound=upper_bound,
324  outlier_z_score=outlier_z_score,
325  include_exceptionals=include_exceptionals,
326  allow_discrete=allow_discrete,
327  cumulation_direction=cumulation_direction)
328  else:
329  # Introduce a dummy name for the temporary two dimensional histogram
330  self.namename = "_" + self.namename
331  self.hist2dhist2d(xs, ys=ys, weights=weights, stackby=stackby,
332  bins=(bins, None),
333  lower_bound=(lower_bound, None),
334  upper_bound=(upper_bound, None),
335  outlier_z_score=(outlier_z_score, outlier_z_score),
336  include_exceptionals=(include_exceptionals, True),
337  allow_discrete=(allow_discrete, False),
338  is_expert=is_expert)
339 
340  self.namename = self.namename[1:]
341  profiles = []
342  for histogram in self.histogramshistograms:
343  profile = self.gaus_slice_fitgaus_slice_fit(histogram,
344  name=histogram.GetName()[1:],
345  z_score=gaus_z_score)
346  profiles.append(profile)
347  self.histogramshistograms = profiles
348  self.plotplot = self.create_stackcreate_stack(profiles, name=self.plotplot.GetName()[1:], reverse_stack=False)
349 
350  if y_log:
351  self.y_logy_log = True
352 
353  if y_binary or self.is_binaryis_binary(ys):
354  if not self.ylabelylabelylabelylabel:
355  self.ylabelylabelylabelylabel = 'probability'
356 
357  for histogram in self.histogramshistograms:
358  histogram.SetMinimum(0)
359  histogram.SetMaximum(1.05)
360 
361  self.plotplot.SetMinimum(0)
362  self.plotplot.SetMaximum(1.05)
363 
364  return self
365 
366  def scatter(self,
367  xs,
368  ys,
369  stackby=None,
370  lower_bound=(None, None),
371  upper_bound=(None, None),
372  outlier_z_score=(None, None),
373  include_exceptionals=(True, True),
374  max_n_data=100000,
375  is_expert=True):
376  """Fill the plot with a (unbinned) two dimensional scatter plot"""
377 
378  self._is_expert_is_expert = is_expert
379 
380  x_lower_bound, y_lower_bound = self.unpack_2d_paramunpack_2d_param(lower_bound)
381  x_upper_bound, y_upper_bound = self.unpack_2d_paramunpack_2d_param(upper_bound)
382  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_paramunpack_2d_param(outlier_z_score)
383  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_paramunpack_2d_param(include_exceptionals)
384 
385  x_lower_bound, x_upper_bound = self.determine_rangedetermine_range(
386  xs,
387  lower_bound=x_lower_bound,
388  upper_bound=x_upper_bound,
389  outlier_z_score=x_outlier_z_score,
390  include_exceptionals=x_include_exceptionals
391  )
392 
393  y_lower_bound, y_upper_bound = self.determine_rangedetermine_range(
394  ys,
395  lower_bound=y_lower_bound,
396  upper_bound=y_upper_bound,
397  outlier_z_score=y_outlier_z_score,
398  include_exceptionals=y_include_exceptionals
399  )
400 
401  graph = ROOT.TGraph()
402 
403  graph.SetName(self.namename)
404  graph.SetMarkerStyle(6)
405  graph.GetHistogram().SetOption("AP") # <-- this has no effect?
406 
407  # Trying to make the graph lines invisible
408  color_index = 0 # white
409  # Transperent white
410  graph.SetLineColorAlpha(color_index, 0)
411  graph.SetLineStyle(self.very_sparse_dots_line_style_indexvery_sparse_dots_line_style_index)
412 
413  # Transport the lower and upper bound as ranges of the axis
414  graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
415  graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
416 
417  self.createcreate(graph,
418  xs=xs,
419  ys=ys,
420  stackby=stackby,
421  reverse_stack=False)
422 
423  return self
424 
425  def grapherrors(self,
426  xs_and_err,
427  ys_and_err,
428  stackby=None,
429  lower_bound=(None, None),
430  upper_bound=(None, None),
431  outlier_z_score=(None, None),
432  include_exceptionals=(True, True),
433  max_n_data=100000,
434  is_expert=True):
435  """Fill the plot with a (unbinned) two dimensional scatter plot
436  xs_and_err and ys_and_err are tuples containing the values and the errors on these values
437  as numpy arrays
438  """
439 
440  self._is_expert_is_expert = is_expert
441 
442  xs = xs_and_err[0]
443  ys = ys_and_err[0]
444 
445  x_lower_bound, y_lower_bound = self.unpack_2d_paramunpack_2d_param(lower_bound)
446  x_upper_bound, y_upper_bound = self.unpack_2d_paramunpack_2d_param(upper_bound)
447  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_paramunpack_2d_param(outlier_z_score)
448  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_paramunpack_2d_param(include_exceptionals)
449 
450  x_lower_bound, x_upper_bound = self.determine_rangedetermine_range(
451  xs,
452  lower_bound=x_lower_bound,
453  upper_bound=x_upper_bound,
454  outlier_z_score=x_outlier_z_score,
455  include_exceptionals=x_include_exceptionals
456  )
457 
458  y_lower_bound, y_upper_bound = self.determine_rangedetermine_range(
459  ys,
460  lower_bound=y_lower_bound,
461  upper_bound=y_upper_bound,
462  outlier_z_score=y_outlier_z_score,
463  include_exceptionals=y_include_exceptionals
464  )
465 
466  graph = ROOT.TGraphErrors()
467 
468  graph.SetName(self.namename)
469  graph.GetHistogram().SetOption("A")
470 
471  graph.SetMarkerColor(4)
472  graph.SetMarkerStyle(21)
473 
474  # Transport the lower and upper bound as ranges of the axis
475  graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
476  graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
477 
478  self.createcreate(graph,
479  xs=xs,
480  ys=ys,
481  stackby=stackby,
482  reverse_stack=False)
483 
484  return self
485 
486  def hist2d(self,
487  xs,
488  ys,
489  weights=None,
490  stackby=None,
491  bins=(None, None),
492  lower_bound=(None, None),
493  upper_bound=(None, None),
494  outlier_z_score=(None, None),
495  include_exceptionals=(True, True),
496  allow_discrete=(False, False),
497  quantiles=None,
498  is_expert=True):
499  """Fill the plot with a two dimensional histogram"""
500 
501  name = self.namename
502  # Introduce a dummy name for the temporary two dimensional histogram
503  if quantiles is not None:
504  name = "_" + self.namename
505 
506  x_bins, y_bins = self.unpack_2d_paramunpack_2d_param(bins)
507  x_lower_bound, y_lower_bound = self.unpack_2d_paramunpack_2d_param(lower_bound)
508  x_upper_bound, y_upper_bound = self.unpack_2d_paramunpack_2d_param(upper_bound)
509  x_outlier_z_score, y_outlier_z_score = self.unpack_2d_paramunpack_2d_param(outlier_z_score)
510  x_include_exceptionals, y_include_exceptionals = self.unpack_2d_paramunpack_2d_param(include_exceptionals)
511  x_allow_discrete, y_allow_discrete = self.unpack_2d_paramunpack_2d_param(allow_discrete)
512 
513  if quantiles is not None:
514  y_include_exceptionals = True
515  y_allow_discrete = False
516 
517  x_bin_edges, x_bin_labels = self.determine_bin_edgesdetermine_bin_edges(xs,
518  stackbys=stackby,
519  bins=x_bins,
520  lower_bound=x_lower_bound,
521  upper_bound=x_upper_bound,
522  outlier_z_score=x_outlier_z_score,
523  include_exceptionals=x_include_exceptionals,
524  allow_discrete=x_allow_discrete)
525 
526  y_bin_edges, y_bin_labels = self.determine_bin_edgesdetermine_bin_edges(ys,
527  stackbys=stackby,
528  bins=y_bins,
529  lower_bound=y_lower_bound,
530  upper_bound=y_upper_bound,
531  outlier_z_score=y_outlier_z_score,
532  include_exceptionals=y_include_exceptionals,
533  allow_discrete=y_allow_discrete)
534 
535  n_x_bins = len(x_bin_edges) - 1
536  n_y_bins = len(y_bin_edges) - 1
537 
538  self.lower_boundlower_bound = (x_bin_edges[0], y_bin_edges[0])
539 
540  self.upper_boundupper_bound = (x_bin_edges[-1], y_bin_edges[-1])
541 
542  histogram = ROOT.TH2D(name,
543  '',
544  n_x_bins,
545  x_bin_edges,
546  n_y_bins,
547  y_bin_edges)
548 
549  if x_bin_labels:
550  get_logger().info("Scatter plot %s is discrete in x.", name)
551  x_taxis = histogram.GetXaxis()
552  for i_x_bin, x_bin_label in enumerate(x_bin_labels):
553  x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
554  self.add_stats_entryadd_stats_entry(histogram, "dx", 1)
555 
556  else:
557  x_bin_width = x_bin_edges[1] - x_bin_edges[0]
558  self.add_stats_entryadd_stats_entry(histogram, "dx", x_bin_width)
559 
560  if y_bin_labels:
561  get_logger().info("Scatter plot %s is discrete in y.", name)
562  y_taxis = histogram.GetYaxis()
563  for i_y_bin, y_bin_label in enumerate(y_bin_labels):
564  y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
565  self.add_stats_entryadd_stats_entry(histogram, "dy", 1)
566 
567  else:
568  y_bin_width = y_bin_edges[1] - y_bin_edges[0]
569  self.add_stats_entryadd_stats_entry(histogram, "dy", y_bin_width)
570 
571  self.createcreate(histogram, xs, ys=ys, weights=weights, stackby=stackby)
572 
573  if quantiles is not None:
574  self.namename = self.namename[1:]
575  profiles = []
576  for histogram in self.histogramshistograms:
577  for quantile in quantiles:
578  profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] + '_' + str(quantile))
579 
580  # Manually copy labels grumble grumble
581  x_taxis = histogram.GetXaxis()
582  new_x_taxis = profile.GetXaxis()
583  for i_bin in range(x_taxis.GetNbins() + 2):
584  label = x_taxis.GetBinLabel(i_bin)
585  if label != "":
586  new_x_taxis.SetBinLabel(i_bin, label)
587 
588  # Remove faulty error values)
589  epsilon = sys.float_info.epsilon
590  for i_bin in range(0, profile.GetNbinsX() + 2):
591  profile.SetBinError(i_bin, epsilon)
592 
593  profiles.append(profile)
594 
595  self.histogramshistograms = profiles
596  self.plotplot = self.create_stackcreate_stack(profiles, name=self.plotplot.GetName()[1:], reverse_stack=False, force_graph=True)
597 
598  # Adjust the discrete bins after the filling to be equidistant
599  if x_bin_labels:
600  for histogram in self.histogramshistograms:
601  x_taxis = histogram.GetXaxis()
602  x_bin_edges = array.array("d", list(range(len(x_bin_labels) + 1)))
603  x_taxis.Set(n_x_bins, x_bin_edges)
604 
605  # Adjust the discrete bins after the filling to be equidistant
606  if y_bin_labels:
607  for histogram in self.histogramshistograms:
608  x_taxis = histogram.GetXaxis()
609  y_bin_edges = array.array("d", list(range(len(y_bin_labels) + 1)))
610  y_taxis.Set(n_y_bins, y_bin_edges)
611 
612  return self
613 
614  def fit_gaus(self, z_score=None):
615  """Fit a gaus belle curve to the central portion of a one dimensional histogram
616 
617  The fit is applied to the central mean +- z_score * std interval of the histogram,
618  such that it is less influence by non gaussian tails further away than the given z score.
619 
620  @param float z_score number of sigmas to include from the mean value of the histogram.
621  """
622  title = "gaus"
623  formula = "gaus"
624 
625  plot = self.plotplot
626  if plot is None:
627  raise RuntimeError('Validation plot must be filled before it can be fitted.')
628 
629  if not isinstance(plot, ROOT.TH1D):
630  raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
631 
632  histogram = plot
633 
634  fit_tf1 = ROOT.TF1("Fit", formula)
635  fit_tf1.SetTitle(title)
636  fit_tf1.SetParName(0, "n")
637  fit_tf1.SetParName(1, "mean")
638  fit_tf1.SetParName(2, "std")
639 
640  n = histogram.GetSumOfWeights()
641  mean = histogram.GetMean()
642  std = histogram.GetStdDev()
643 
644  fit_tf1.SetParameter(0, n)
645  fit_tf1.SetParameter(1, mean)
646  fit_tf1.SetParameter(2, std)
647 
648  fit_options = "LM"
649  return self.fitfit(fit_tf1,
650  fit_options,
651  z_score=z_score)
652 
653  def fit_line(self):
654  """Fit a general line to a one dimensional histogram"""
655  title = "line"
656  formula = "x++1"
657  fit_tf1 = ROOT.TF1("Fit", formula)
658  fit_tf1.SetTitle(title)
659  fit_tf1.SetParName(0, "slope")
660  fit_tf1.SetParName(1, "intercept")
661  self.fitfit(fit_tf1, 'M')
662 
663  def fit_const(self):
664  """Fit a constant function to a one dimensional histogram"""
665  title = "const"
666  formula = "[0]"
667  fit_tf1 = ROOT.TF1("Fit", formula)
668  fit_tf1.SetTitle(title)
669  fit_tf1.SetParName(0, "intercept")
670  self.fitfit(fit_tf1, 'M')
671 
672  def fit_diag(self):
673  """Fit a diagonal line through the origin to a one dimensional histogram"""
674  title = "diag"
675  formula = "[0]*x"
676  fit_tf1 = ROOT.TF1("Fit", formula)
677  fit_tf1.SetTitle(title)
678  fit_tf1.SetParName(0, "slope")
679  self.fitfit(fit_tf1, 'M')
680 
681  def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
682  """Fit a user defined function to a one dimensional histogram
683 
684  Parameters
685  ----------
686  formula : str or TF1
687  Formula string or TH1 to be fitted. See TF1 constructurs for that is a valid formula
688  options : str
689  Options string to be used in the fit. See TH1::Fit()
690  lower_bound : float
691  Lower bound of the range to be fitted
692  upper_bound : float
693  Upper bound of the range to be fitted
694  """
695  plot = self.plotplot
696  if plot is None:
697  raise RuntimeError('Validation plot must be filled before it can be fitted.')
698 
699  if not isinstance(plot, ROOT.TH1D):
700  raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
701 
702  histogram = plot
703 
704  xaxis = histogram.GetXaxis()
705  n_bins = xaxis.GetNbins()
706  hist_lower_bound = xaxis.GetBinLowEdge(1)
707  hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
708 
709  if z_score is not None:
710  mean = histogram.GetMean()
711  std = histogram.GetStdDev()
712 
713  if lower_bound is None:
714  lower_bound = mean - z_score * std
715 
716  if upper_bound is None:
717  upper_bound = mean + z_score * std
718 
719  # Setup the plotting range of the function to match the histogram
720  if isinstance(formula, ROOT.TF1):
721  fit_tf1 = formula
722  fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
723  else:
724  fit_tf1 = ROOT.TF1("Fit",
725  formula,
726  hist_lower_bound,
727  hist_upper_bound)
728  get_logger().info('Fitting with %s', fit_tf1.GetExpFormula())
729 
730  # Determine fit range
731  if lower_bound is None or lower_bound < hist_lower_bound:
732  lower_bound = hist_lower_bound
733  if upper_bound is None or upper_bound > hist_upper_bound:
734  upper_bound = hist_upper_bound
735 
736  # Make sure the fitted function is not automatically added since we want to do that one our own.
737  # Look for the documentation of TH1::Fit() for details of the options.
738  if 'N' not in options:
739  options += 'N'
740 
741  fit_res = histogram.Fit(fit_tf1, options + "S", "", lower_bound, upper_bound)
742 
743  self.set_fit_tf1set_fit_tf1(histogram, fit_tf1)
744  return fit_res
745 
746  def show(self):
747  """Show the plot."""
748  if self.plotplot:
749  self.plotplot.Draw()
750  else:
751  raise ValueError("Can not show a validation plot that has not been filled.")
752 
753  def write(self, tdirectory=None):
754  """Write the plot to file
755 
756  Parameters
757  ----------
758  tdirectory : ROOT.TDirectory, optional
759  ROOT directory to which the plot should be written.
760  If omitted write to the current directory
761  """
762  if not self.plotplot:
763  raise ValueError("Can not write a validation plot that has not been filled.")
764 
765  with root_cd(tdirectory):
766  ValidationPlot.set_tstyle()
767  if self.plotplot not in self.histogramshistograms:
768  self.plotplot.Write()
769 
770  # always disable ROOT's stat plot because it hides items
771  meta_options = ["nostats"]
772 
773  # add expert option, if requested
774  if self.is_expertis_expert:
775  meta_options.append("expert")
776  else:
777  meta_options.append("shifter")
778 
779  # give a custom pvalue warning / error zone if requested
780  if self.pvalue_errorpvalue_error is not None:
781  meta_options.append("pvalue-error={}".format(self.pvalue_errorpvalue_error))
782  if self.pvalue_warnpvalue_warn is not None:
783  meta_options.append("pvalue-warn={}".format(self.pvalue_warnpvalue_warn))
784 
785  # Indicator if the y axes should be displayed as a logarithmic scale
786  if self.y_logy_log:
787  meta_options.append("logy")
788 
789  meta_options_str = ",".join(meta_options)
790 
791  for histogram in self.histogramshistograms:
792  histogram.GetListOfFunctions().Add(ROOT.TNamed('MetaOptions', meta_options_str))
793  histogram.Write()
794 
795  @property
796  def is_expert(self):
797  """Getter method if an plot plot is marked as expert plot"""
798  return self._is_expert_is_expert
799 
800  @property
801  def title(self):
802  """Getter for the plot title"""
803  return self._title_title
804 
805  @title.setter
806  def title(self, title):
807  """Setter for the plot title"""
808  self._title_title = title
809  if self.plotplot:
810  self.plotplot.SetTitle(title)
811  for histogram in self.histogramshistograms:
812  histogram.SetTitle(title)
813 
814  @property
815  def xlabel(self):
816  """Getter for the axis label at the x axis"""
817  return self._xlabel_xlabel
818 
819  @xlabel.setter
820  def xlabel(self, xlabel):
821  """Setter for the axis label at the x axis"""
822  self._xlabel_xlabel = xlabel
823  for histogram in self.histogramshistograms:
824  histogram.GetXaxis().SetTitle(xlabel)
825 
826  @property
827  def ylabel(self):
828  """Getter for the axis label at the y axis"""
829  return self._ylabel_ylabel
830 
831  @ylabel.setter
832  def ylabel(self, ylabel):
833  """Setter for the axis label at the y axis"""
834  self._ylabel_ylabel = ylabel
835  for histogram in self.histogramshistograms:
836  histogram.GetYaxis().SetTitle(ylabel)
837 
838  @property
839  def contact(self):
840  """Getter for the contact email address to be displayed on the validation page"""
841  return self._contact_contact
842 
843  @contact.setter
844  def contact(self, contact):
845  """Setter for the contact email address to be displayed on the validation page"""
846  self._contact_contact = contact
847  for histogram in self.histogramshistograms:
848  found_obj = histogram.FindObject('Contact')
849  if not found_obj:
850  tnamed = ROOT.TNamed("Contact", contact)
851  histogram.GetListOfFunctions().Add(tnamed)
852  found_obj = histogram.FindObject('Contact')
853  found_obj.SetTitle(contact)
854 
855  @property
856  def description(self):
857  """Getter for the description to be displayed on the validation page"""
858  return self._description_description
859 
860  @description.setter
861  def description(self, description):
862  """Setter for the description to be displayed on the validation page"""
863  self._description_description = description
864  for histogram in self.histogramshistograms:
865  found_obj = histogram.FindObject('Description')
866  if not found_obj:
867  tnamed = ROOT.TNamed("Description", description)
868  histogram.GetListOfFunctions().Add(tnamed)
869  found_obj = histogram.FindObject('Description')
870  found_obj.SetTitle(description)
871 
872  @property
873  def check(self):
874  """Getter for the check to be displayed on the validation page"""
875  return self._check_check
876 
877  @check.setter
878  def check(self, check):
879  """Setter for the check to be displayed on the validation page"""
880  self._check_check = check
881  for histogram in self.histogramshistograms:
882  found_obj = histogram.FindObject('Check')
883  if not found_obj:
884  tnamed = ROOT.TNamed("Check", check)
885  histogram.GetListOfFunctions().Add(tnamed)
886  found_obj = histogram.FindObject('Check')
887  found_obj.SetTitle(check)
888 
889  # Implementation details #
890  # ###################### #
891 
892  @staticmethod
893  def unpack_2d_param(param):
894  """Unpacks a function parameter for the two dimensional plots.
895 
896  If it is a pair the first parameter shall apply to the x coordinate
897  the second to the y coordinate. In this case the pair is returned as two values
898 
899  If something else is given the it is assumed that this parameter should equally apply
900  to both coordinates. In this case the same values is return twice as a pair.
901 
902  Parameters
903  ----------
904  param : pair or single value
905  Function parameter for a two dimensional plot
906 
907  Returns
908  -------
909  pair
910  A pair of values being the parameter for the x coordinate and
911  the y coordinate respectively
912  """
913  try:
914  if len(param) == 2:
915  x_param, y_param = param
916  except TypeError:
917  x_param = param
918  y_param = param
919  return x_param, y_param
920 
921  @staticmethod
922  def is_binary(xs):
923  """Determine if the data consists of boolean values"""
924  return statistics.is_binary_series(xs)
925 
926  @staticmethod
927  def is_discrete(xs, max_n_unique=None):
928  """Determine if the data consists of discrete values"""
929  return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
930 
931  @staticmethod
933  """Find exceptionally frequent values
934 
935  Parameters
936  ----------
937  xs : np.array (1d)
938  Data series
939 
940  Returns
941  -------
942  np.array (1d)
943  A list of the found exceptional values.
944  """
945  return statistics.rice_exceptional_values(xs)
946 
947  @staticmethod
949  """Does an estimation of mean and standard deviation robust against outliers.
950 
951  Parameters
952  ----------
953  xs : np.array (1d)
954  Data series
955 
956  Returns
957  -------
958  float, float
959  Pair of mean and standard deviation
960  """
961  x_mean = statistics.truncated_mean(xs)
962  x_std = statistics.trimmed_std(xs)
963  return x_mean, x_std
964 
965  @staticmethod
966  def format_bin_label(value):
967  """Formats a value to be placed at a tick on an axis."""
968  if np.isfinite(value) and value == np.round(value):
969  return str(int(value))
970  else:
971  formated_value = "{:.5g}".format(value)
972 
973  # if the label is to long, switch to shorter "e" format
974  if len(formated_value) > 8:
975  formated_value = "{:.3e}".format(value)
976  return formated_value
977 
978  def create_1d(self,
979  th1_factory,
980  xs,
981  ys=None,
982  weights=None,
983  bins=None,
984  stackby=None,
985  lower_bound=None,
986  upper_bound=None,
987  outlier_z_score=None,
988  include_exceptionals=True,
989  allow_discrete=False,
990  cumulation_direction=None):
991  """Combined factory method for creating a one dimensional histogram or a profile plot."""
992  name = self.namename
993 
994  # Coerce values to a numpy array. Do not copy if already a numpy array.
995  xs = np.array(xs, copy=False)
996 
997  if ys is not None:
998  ys = np.array(ys, copy=False)
999 
1000  if weights is not None:
1001  weights = np.array(weights, copy=False)
1002 
1003  bin_edges, bin_labels = self.determine_bin_edgesdetermine_bin_edges(xs,
1004  stackbys=stackby,
1005  bins=bins,
1006  lower_bound=lower_bound,
1007  upper_bound=upper_bound,
1008  outlier_z_score=outlier_z_score,
1009  include_exceptionals=include_exceptionals,
1010  allow_discrete=allow_discrete)
1011 
1012  n_bins = len(bin_edges) - 1
1013  self.lower_boundlower_bound = bin_edges[0]
1014  self.upper_boundupper_bound = bin_edges[-1]
1015  histogram = th1_factory(name, '', n_bins, bin_edges)
1016 
1017  if bin_labels:
1018  get_logger().info("One dimensional plot %s is discrete in x.", name)
1019  x_taxis = histogram.GetXaxis()
1020  for i_bin, bin_label in enumerate(bin_labels):
1021  x_taxis.SetBinLabel(i_bin + 1, bin_label)
1022  self.add_stats_entryadd_stats_entry(histogram, "dx", 1)
1023 
1024  else:
1025  bin_width = bin_edges[1] - bin_edges[0]
1026  self.add_stats_entryadd_stats_entry(histogram, "dx", bin_width)
1027 
1028  self.createcreate(histogram,
1029  xs,
1030  ys=ys,
1031  weights=weights,
1032  stackby=stackby,
1033  cumulation_direction=cumulation_direction,
1034  reverse_stack=True)
1035  # Reverse the stack to have the signal distribution at the bottom
1036 
1037  # Adjust the discrete bins after the filling to be equidistant
1038  if bin_labels:
1039  for histogram in self.histogramshistograms:
1040  x_taxis = histogram.GetXaxis()
1041  bin_edges = array.array("d", list(range(len(bin_labels) + 1)))
1042  x_taxis.Set(n_bins, bin_edges)
1043 
1044  def create(self,
1045  histogram_template,
1046  xs,
1047  ys=None,
1048  weights=None,
1049  stackby=None,
1050  cumulation_direction=None,
1051  reverse_stack=None):
1052  """Create histograms from a template, possibly stacked"""
1053 
1054  histograms = []
1055 
1056  if stackby is None:
1057  histogram = histogram_template
1058  self.fill_intofill_into(histogram, xs, ys, weights=weights)
1059  if cumulation_direction is not None:
1060  histogram = self.cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1061 
1062  histograms.append(histogram)
1063  plot = histogram
1064 
1065  else:
1066  stackby = np.array(stackby, copy=False)
1067  name = histogram_template.GetName()
1068 
1069  histograms = self.fill_into_groupedfill_into_grouped(histogram_template,
1070  xs,
1071  ys=ys,
1072  weights=weights,
1073  groupbys=stackby,
1074  groupby_label="stack")
1075 
1076  if cumulation_direction is not None:
1077  histograms = [self.cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1078  for histogram in histograms]
1079 
1080  plot = self.create_stackcreate_stack(histograms, name=name + "_stacked", reverse_stack=reverse_stack)
1081 
1082  self.histogramshistograms = histograms
1083  self.plotplot = plot
1084  self.attach_attributesattach_attributes()
1085 
1086  @classmethod
1087  def create_stack(cls, histograms, name, reverse_stack, force_graph=False):
1088  """Create a stack of histograms"""
1089  if len(histograms) == 1:
1090  plot = histograms[0]
1091  else:
1092  if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph)) or force_graph:
1093  plot = ROOT.TMultiGraph()
1094  else:
1095  plot = ROOT.THStack()
1096 
1097  plot.SetName(name)
1098 
1099  # Add the histogram in reverse order such
1100  # that the signal usually is on the bottom an well visible
1101  if reverse_stack:
1102  for histogram in reversed(histograms):
1103  if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1104  histogram = cls.convert_tprofile_to_tgrapherrorsconvert_tprofile_to_tgrapherrors(histogram)
1105  plot.Add(histogram, "APZ")
1106  else:
1107  plot.Add(histogram)
1108  else:
1109  for histogram in histograms:
1110  if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1111  histogram = cls.convert_tprofile_to_tgrapherrorsconvert_tprofile_to_tgrapherrors(histogram)
1112  plot.Add(histogram, "APZ")
1113  else:
1114  plot.Add(histogram)
1115 
1116  return plot
1117 
1118  @classmethod
1119  def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False):
1120  """Extract errors from a TProfile histogram and create a TGraph from these"""
1121  if isinstance(tprofile, ROOT.TGraph):
1122  return tprofile
1123 
1124  x_taxis = tprofile.GetXaxis()
1125  n_bins = x_taxis.GetNbins()
1126 
1127  # bin_ids_with_underflow = list(range(n_bins + 1))
1128  bin_ids_without_underflow = list(range(1, n_bins + 1))
1129 
1130  bin_centers = np.array([x_taxis.GetBinCenter(i_bin) for i_bin in bin_ids_without_underflow])
1131  if abs_x:
1132  bin_centers = np.abs(bin_centers)
1133  bin_widths = np.array([x_taxis.GetBinWidth(i_bin) for i_bin in bin_ids_without_underflow])
1134  bin_x_errors = bin_widths / 2.0
1135 
1136  # Now for the histogram content not including the underflow
1137  bin_contents = np.array([tprofile.GetBinContent(i_bin) for i_bin in bin_ids_without_underflow])
1138  bin_y_errors = np.array([tprofile.GetBinError(i_bin) for i_bin in bin_ids_without_underflow])
1139 
1140  tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1141 
1142  tgrapherrors.GetHistogram().SetOption("APZ") # <-- This has no effect?
1143 
1144  tgrapherrors.SetLineColor(tprofile.GetLineColor())
1145  tgrapherrors.SetLineColor(tprofile.GetLineColor())
1146 
1147  # Copy all functions and stats entries
1148  for tobject in tprofile.GetListOfFunctions():
1149  tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1150 
1151  # Add stats entries that are displayed for profile plots
1152  cls.add_stats_entryadd_stats_entry(tgrapherrors, 'count', tprofile.GetEntries())
1153 
1154  stats_values = np.array([np.nan] * 6)
1155  tprofile.GetStats(stats_values)
1156 
1157  sum_w = stats_values[0]
1158  # sum_w2 = stats_values[1]
1159  sum_wx = stats_values[2]
1160  sum_wx2 = stats_values[3]
1161  sum_wy = stats_values[4]
1162  sum_wy2 = stats_values[5]
1163 
1164  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1165  "x avg",
1166  sum_wx / sum_w)
1167 
1168  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1169  "x std",
1170  np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1171 
1172  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1173  "y avg",
1174  sum_wy / sum_w)
1175 
1176  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1177  "y std",
1178  np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1179 
1180  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1181  'cov',
1182  tgrapherrors.GetCovariance())
1183 
1184  cls.add_stats_entryadd_stats_entry(tgrapherrors,
1185  'corr',
1186  tgrapherrors.GetCorrelationFactor())
1187 
1188  return tgrapherrors
1189 
1191  histogram_template,
1192  xs,
1193  ys=None,
1194  weights=None,
1195  groupbys=None,
1196  groupby_label="group"):
1197  """Fill data into similar histograms in groups indicated by a groupby array"""
1198 
1199  histograms = []
1200  unique_groupbys = np.unique(groupbys)
1201  name = histogram_template.GetName()
1202 
1203  for i_value, value in enumerate(unique_groupbys):
1204  if np.isnan(value):
1205  indices_for_value = np.isnan(groupbys)
1206  else:
1207  indices_for_value = groupbys == value
1208 
1209  # Make a copy of the empty histogram
1210  histogram_for_value = histogram_template.Clone(name + '_' + str(value))
1211  i_root_color = i_value + 1
1212 
1213  self.set_colorset_color(histogram_for_value, i_root_color)
1214 
1215  if groupby_label:
1216  self.add_stats_entryadd_stats_entry(histogram_for_value, groupby_label, value)
1217 
1218  self.fill_intofill_into(histogram_for_value,
1219  xs,
1220  ys=ys,
1221  weights=weights,
1222  filter=indices_for_value)
1223 
1224  histograms.append(histogram_for_value)
1225 
1226  return histograms
1227 
1228  def set_color(self, tobject, root_i_color):
1229  """Set the color of the ROOT object.
1230 
1231  By default the line color of a TGraph should be invisible, so do not change it
1232  For other objects set the marker and the line color
1233 
1234  Parameters
1235  ----------
1236  tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1237  Object of which the color should be set.
1238  root_i_color : int
1239  Color index of the ROOT color table
1240  """
1241  if isinstance(tobject, ROOT.TGraph):
1242  tobject.SetMarkerColor(root_i_color)
1243  else:
1244  tobject.SetLineColor(root_i_color)
1245  tobject.SetMarkerColor(root_i_color)
1246 
1247  def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1248  """Fill the data into the plot object"""
1249  if isinstance(plot, ROOT.TGraph):
1250  if ys is None:
1251  raise ValueError("ys are required for filling a graph")
1252  self.fill_into_tgraphfill_into_tgraph(plot, xs, ys, filter=filter)
1253  elif isinstance(plot, ROOT.TGraphErrors):
1254  if ys is None:
1255  raise ValueError("ys are required for filling a graph error")
1256 
1257  self.fill_into_tgrapherrorfill_into_tgrapherror(plot, xs, ys)
1258  else:
1259  self.fill_into_th1fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1260 
1261  def fill_into_tgrapherror(self, graph, xs, ys, filter=None):
1262  """fill point values and error of the x and y axis into the graph"""
1263 
1264  assert(len(xs[0]) == len(ys[0]))
1265  # set the overall amount of points
1266  graph.Set(len(xs[0]))
1267 
1268  for i in range(len(xs[0])):
1269  graph.SetPoint(i, xs[0][i], ys[0][i])
1270  graph.SetPointError(i, xs[1][i], ys[1][i])
1271 
1272  def fill_into_tgraph(self, graph, xs, ys, filter=None):
1273  """Fill the data into a TGraph"""
1274 
1275  # Save some ifs by introducing a dummy slicing as a non filter
1276  if filter is None:
1277  filter = slice(None)
1278 
1279  xs = xs[filter]
1280  ys = ys[filter]
1281 
1282  max_n_data = 100000
1283  x_n_data = len(xs)
1284  y_n_data = len(ys)
1285 
1286  if max_n_data:
1287  if x_n_data > max_n_data or y_n_data > max_n_data:
1288  get_logger().warning("Number of points in scatter graph %s exceed limit %s" %
1289  (self.namename, max_n_data))
1290 
1291  get_logger().warning("Cropping %s" % max_n_data)
1292 
1293  xs = xs[0:max_n_data]
1294  ys = ys[0:max_n_data]
1295 
1296  x_axis = graph.GetXaxis()
1297  y_axis = graph.GetYaxis()
1298 
1299  x_lower_bound = x_axis.GetXmin()
1300  x_upper_bound = x_axis.GetXmax()
1301 
1302  y_lower_bound = y_axis.GetXmin()
1303  y_upper_bound = y_axis.GetXmax()
1304 
1305  x_underflow_indices = xs < x_lower_bound
1306  x_overflow_indices = xs > x_upper_bound
1307 
1308  y_underflow_indices = ys < y_lower_bound
1309  y_overflow_indices = ys > y_upper_bound
1310 
1311  plot_indices = ~(np.isnan(xs) |
1312  x_underflow_indices |
1313  x_overflow_indices |
1314  np.isnan(ys) |
1315  y_underflow_indices |
1316  y_overflow_indices)
1317 
1318  n_plot_data = np.sum(plot_indices)
1319  plot_xs = xs[plot_indices]
1320  plot_ys = ys[plot_indices]
1321 
1322  graph.Set(int(n_plot_data))
1323  for i, (x, y) in enumerate(zip(plot_xs, plot_ys)):
1324  graph.SetPoint(i, x, y)
1325 
1326  self.add_stats_entryadd_stats_entry(graph, 'count', np.sum(np.isfinite(xs)))
1327 
1328  self.add_nan_inf_statsadd_nan_inf_stats(graph, 'x', xs)
1329  self.add_nan_inf_statsadd_nan_inf_stats(graph, 'y', ys)
1330 
1331  x_n_underflow = np.sum(x_underflow_indices)
1332  if x_n_underflow:
1333  self.add_stats_entryadd_stats_entry(graph, 'x underf.', x_n_underflow)
1334 
1335  x_n_overflow = np.sum(x_overflow_indices)
1336  if x_n_overflow:
1337  self.add_stats_entryadd_stats_entry(graph, 'x overf.', x_n_overflow)
1338 
1339  y_n_underflow = np.sum(y_underflow_indices)
1340  if y_n_underflow:
1341  self.add_stats_entryadd_stats_entry(graph, 'y underf.', y_n_underflow)
1342 
1343  y_n_overflow = np.sum(y_overflow_indices)
1344  if y_n_overflow:
1345  self.add_stats_entryadd_stats_entry(graph, 'y overf.', y_n_overflow)
1346 
1347  self.add_stats_entryadd_stats_entry(graph, 'x avg', graph.GetMean(1))
1348  self.add_stats_entryadd_stats_entry(graph, 'x std', graph.GetRMS(1))
1349 
1350  self.add_stats_entryadd_stats_entry(graph, 'y avg', graph.GetMean(2))
1351  self.add_stats_entryadd_stats_entry(graph, 'y std', graph.GetRMS(2))
1352 
1353  self.add_stats_entryadd_stats_entry(graph, 'cov', graph.GetCovariance())
1354  self.add_stats_entryadd_stats_entry(graph, 'corr', graph.GetCorrelationFactor())
1355 
1356  def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None):
1357  """Fill the histogram blocking non finite values
1358 
1359  Parameters
1360  ----------
1361  histogram : ROOT.TH1
1362  The histogram to be filled
1363  xs : numpy.ndarray (1d)
1364  Data for the first axes
1365  ys : numpy.ndarray (1d), optional
1366  Data for the second axes
1367  weights : numpy.ndarray (1d), optional
1368  Weight of the individual points. Defaults to one for each
1369  filter : numpy.ndarray, optional
1370  Boolean index array indicating which entries shall be taken.
1371  """
1372  # Save some ifs by introducing a dummy slicing as a non filter
1373  if filter is None:
1374  filter = slice(None)
1375 
1376  xs = xs[filter]
1377  # Count the nan and inf values in x
1378  self.add_nan_inf_statsadd_nan_inf_stats(histogram, 'x', xs)
1379  finite_filter = np.isfinite(xs)
1380 
1381  if ys is not None:
1382  ys = ys[filter]
1383  # Count the nan and inf values in y
1384  self.add_nan_inf_statsadd_nan_inf_stats(histogram, 'y', ys)
1385  finite_filter &= np.isfinite(ys)
1386 
1387  if weights is None:
1388  xs = xs[finite_filter]
1389  weights = np.ones_like(xs)
1390  else:
1391  weights = weights[filter]
1392  self.add_nan_inf_statsadd_nan_inf_stats(histogram, 'w', weights)
1393  finite_filter &= np.isfinite(weights)
1394  xs = xs[finite_filter]
1395  weights[finite_filter]
1396 
1397  if ys is not None:
1398  ys = ys[finite_filter]
1399 
1400  # Now fill the actual histogram
1401  try:
1402  histogram.FillN
1403  except AttributeError:
1404  Fill = histogram.Fill
1405  if ys is None:
1406  fill = np.frompyfunc(Fill, 2, 1)
1407  fill(xs.astype(np.float64, copy=False),
1408  weights.astype(np.float64, copy=False))
1409  else:
1410  fill = np.frompyfunc(Fill, 3, 1)
1411  fill(xs.astype(np.float64, copy=False),
1412  ys.astype(np.float64, copy=False),
1413  weights.astype(np.float64, copy=False))
1414  else:
1415  if ys is None:
1416  # Make the array types compatible with the ROOT interface if necessary
1417  xs = xs.astype(np.float64, copy=False)
1418  weights = weights.astype(np.float64, copy=False)
1419  n = len(xs)
1420  if n != 0:
1421  histogram.FillN(n, xs, weights)
1422  else:
1423  basf2.B2WARNING("No values to be filled into histogram: " + self.namename)
1424 
1425  else:
1426  # Make the array types compatible with the ROOT interface if necessary
1427  xs = xs.astype(np.float64, copy=False)
1428  ys = ys.astype(np.float64, copy=False)
1429  weights = weights.astype(np.float64, copy=False)
1430  n = len(xs)
1431  if n != 0:
1432  histogram.FillN(n, xs, ys, weights)
1433  else:
1434  basf2.B2WARNING("No values to be filled into histogram: " + self.namename)
1435 
1436  self.set_additional_stats_tf1set_additional_stats_tf1(histogram)
1437 
1438  @classmethod
1439  def add_nan_inf_stats(cls, histogram, name, xs):
1440  """ Extracts the counts of non finite floats from a series
1441  and adds them as additional statistics to the histogram.
1442 
1443  Parameters
1444  ----------
1445  histogram : derived from ROOT.TH1 or ROOT.TGraph
1446  Something having a GetListOfFunctions method that
1447  name : str
1448  A label for the data series to be prefixed to the entries.
1449  xs : numpy.ndarray (1d)
1450  Data from which the non finit floats should be counted.
1451  """
1452  n_nans = np.isnan(xs).sum()
1453  if n_nans > 0:
1454  cls.add_stats_entryadd_stats_entry(histogram, name + ' nan', n_nans)
1455 
1456  n_positive_inf = np.sum(xs == np.inf)
1457  if n_positive_inf > 0:
1458  cls.add_stats_entryadd_stats_entry(histogram, name + ' pos inf', n_positive_inf)
1459 
1460  n_negative_inf = np.sum(xs == -np.inf)
1461  if n_negative_inf > 0:
1462  cls.add_stats_entryadd_stats_entry(histogram, name + ' neg inf', n_negative_inf)
1463 
1464  @classmethod
1465  def add_stats_entry(cls, histogram, label, value):
1466  """Add a new additional statistics to the histogram.
1467 
1468  Parameters
1469  ----------
1470  histogram : derived from ROOT.TH1 or ROOT.TGraph
1471  Something having a GetListOfFunctions method that holds the additional statistics
1472  label : str
1473  Label of the statistic
1474  value : float
1475  Value of the statistic
1476  """
1477  stats_entry = StatsEntry(str(label), float(value))
1478  histogram.GetListOfFunctions().Add(stats_entry)
1479  cls.set_additional_stats_tf1set_additional_stats_tf1(histogram)
1480 
1481  @classmethod
1482  def get_additional_stats(cls, histogram):
1483  """Get the additional statistics from the histogram and return them a dict.
1484 
1485  Parameters
1486  ----------
1487  histogram : derived from ROOT.TH1 or ROOT.TGraph
1488  Something having a GetListOfFunctions method that holds the additional statistics
1489 
1490  Returns
1491  -------
1492  collection.OrderedDict
1493  A map of labels to values for the additional statistics
1494  """
1495  additional_stats = collections.OrderedDict()
1496  for tobject in histogram.GetListOfFunctions():
1497  if isinstance(tobject, StatsEntry):
1498  stats_entry = tobject
1499  label = stats_entry.GetName()
1500  value = stats_entry.GetVal()
1501  additional_stats[label] = value
1502  return additional_stats
1503 
1504  @classmethod
1505  def gaus_slice_fit(cls, th2, name, z_score=None):
1506  """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1507  # profile = th2.ProfileX(name)
1508 
1509  y_taxis = th2.GetYaxis()
1510  th2_lower_bound = y_taxis.GetXmin()
1511  th2_upper_bound = y_taxis.GetXmax()
1512  th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1513  n_y_bins = y_taxis.GetNbins()
1514  if z_score:
1515  y_mean = th2.GetMean(2)
1516  y_std = th2.GetStdDev(2)
1517  fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1518  fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1519  fit_height = fit_upper_bound - fit_lower_bound
1520  # Require all covered bins to be filled
1521  required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1522  else:
1523  fit_lower_bound = th2_lower_bound
1524  fit_upper_bound = th2_upper_bound
1525  fit_height = fit_upper_bound - fit_lower_bound
1526  required_n_bins_inslice_filled = n_y_bins / 1.61
1527 
1528  # Highest required number of bins should be a third
1529  required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1530 
1531  fit_tf1 = ROOT.TF1("Fit", "gaus", fit_lower_bound, fit_upper_bound)
1532  fit_tf1.SetParName(0, "n")
1533  fit_tf1.SetParName(1, "mean")
1534  fit_tf1.SetParName(2, "std")
1535  i_first_bin = 0
1536  i_last_bin = -1
1537  fit_options = "QNR"
1538  param_fit_th1s = ROOT.TObjArray()
1539  th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1540  int(required_n_bins_inslice_filled),
1541  fit_options, param_fit_th1s)
1542 
1543  th1_means = param_fit_th1s.At(1)
1544  th1_means.SetName(name)
1545  th1_means.SetTitle(th2.GetTitle())
1546  for label, value in cls.get_additional_statsget_additional_stats(th2).items():
1547  cls.add_stats_entryadd_stats_entry(th1_means, label, value)
1548 
1549  # Manually copy labels grumble grumble
1550  x_taxis = th2.GetXaxis()
1551  new_x_taxis = th1_means.GetXaxis()
1552  for i_bin in range(x_taxis.GetNbins() + 2):
1553  label = x_taxis.GetBinLabel(i_bin)
1554  if label != "":
1555  new_x_taxis.SetBinLabel(i_bin, label)
1556 
1557  # Adjust plot bound to reflect the fit range.
1558  data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1559  data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1560  data_height = data_upper_bound - data_lower_bound
1561 
1562  plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1563  plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1564 
1565  th1_means.SetMinimum(plot_lower_bound)
1566  th1_means.SetMaximum(plot_upper_bound)
1567 
1568  return th1_means
1569 
1570  @classmethod
1571  def cumulate(cls, histogram, cumulation_direction=None):
1572  """Cumulates the histogram inplace.
1573 
1574  Parameters
1575  ----------
1576  histogram : ROOT.TH1 or ROOT.TProfile
1577  Filled histogram to be cumulated
1578  cumulation_direction : int, optional
1579  Direction is indicated by the sign.
1580  Positive means from left to right, negative means from right to left.
1581  If now cumulation direction is given return the histogram as is.
1582 
1583  Returns
1584  -------
1585  ROOT.TH1
1586  Cumulated histogram potentially altered inplace.
1587  """
1588  if not cumulation_direction:
1589  return histogram
1590 
1591  cumulate_backward = cumulation_direction < 0
1592  cumulate_forward = not cumulate_backward
1593 
1594  if isinstance(histogram, ROOT.TH2):
1595  raise ValueError("Cannot cumulate a two dimensional histogram.")
1596 
1597  if isinstance(histogram, ROOT.TH3):
1598  raise ValueError("Cannot cumulate a three dimensional histogram.")
1599 
1600  if not isinstance(histogram, ROOT.TH1):
1601  raise ValueError("Can only cumulate a one dimensional histogram.")
1602 
1603  if isinstance(histogram, ROOT.TProfile):
1604  tprofile = histogram
1605  # Turn the profile histogram into graph where we can set the new content and errors
1606  tgraph = cls.convert_tprofile_to_tgrapherrorsconvert_tprofile_to_tgrapherrors(histogram)
1607  tgraph.SetName(tprofile.GetName())
1608 
1609  n_bins = histogram.GetNbinsX()
1610  n_points = n_bins
1611  cumulated_content = 0.0
1612  cumulated_entries = 0
1613  cumulated_std = 0.0
1614 
1615  # Always include the overflow bins.
1616  i_bins = list(range(0, n_bins + 2))
1617  if not cumulate_forward:
1618  i_bins = reversed(i_bins)
1619 
1620  for i_bin in i_bins:
1621  i_point = i_bin - 1
1622  bin_content = tprofile.GetBinContent(i_bin)
1623  bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1624  bin_std = tprofile.GetBinError(i_bin)
1625 
1626  if bin_entries != 0:
1627  cumulated_content = (
1628  1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1629  (cumulated_entries + bin_entries)
1630  )
1631 
1632  cumulated_std = (
1633  math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1634  (cumulated_entries + bin_entries)
1635  )
1636 
1637  cumulated_entries = cumulated_entries + bin_entries
1638  else:
1639  # empty bin does not add anything to the cumulation
1640  pass
1641 
1642  if i_point >= 0 and i_point < n_points:
1643  x = tgraph.GetX()[i_point]
1644  # x = ROOT.Double()
1645  # y = ROOT.Double()
1646  # tgraph.GetPoint(i_point, x, y)
1647  tgraph.SetPoint(i_point, x, cumulated_content)
1648 
1649  x_error = tgraph.GetErrorX(i_point)
1650  tgraph.SetPointError(i_point, x_error, cumulated_std)
1651  return tgraph
1652 
1653  else:
1654  # Always include the overflow bins.
1655  n_bins = histogram.GetNbinsX()
1656  cumulated_content = 0.0
1657 
1658  i_bins = list(range(0, n_bins + 2))
1659  if not cumulate_forward:
1660  i_bins = reversed(i_bins)
1661 
1662  for i_bin in i_bins:
1663  bin_content = histogram.GetBinContent(i_bin)
1664  cumulated_content += bin_content
1665  histogram.SetBinContent(i_bin, cumulated_content)
1666 
1667  return histogram
1668 
1670  xs,
1671  stackbys=None,
1672  bins=None,
1673  lower_bound=None,
1674  upper_bound=None,
1675  outlier_z_score=None,
1676  include_exceptionals=True,
1677  allow_discrete=False):
1678  """Deducing bin edges from a data series.
1679 
1680  Parameters
1681  ----------
1682  xs : numpy.ndarray (1d)
1683  Data point for which a binning should be found.
1684  stackbys : numpy.ndarray (1d)
1685  Categories of the data points to be distinguishable
1686  bins : list(float) or int or None, optional
1687  Preset bin edges or preset number of desired bins.
1688  The default, None, means the bound should be extracted from data.
1689  The rice rule is used the determine the number of bins.
1690  If a list of floats is given return them immediatly.
1691  lower_bound : float or None, optional
1692  Preset lower bound of the binning range.
1693  The default, None, means the bound should be extracted from data.
1694  upper_bound : float or None, optional
1695  Preset upper bound of the binning range.
1696  The default, None, means the bound should be extracted from data.
1697  outlier_z_score : float or None, optional
1698  Threshold z-score of outlier detection.
1699  The default, None, means no outlier detection.
1700  include_exceptionals : bool, optional
1701  If the outlier detection is active this switch indicates,
1702  if values detected as exceptionally frequent shall be included
1703  nevertheless into the binning range. Default is True,
1704  which means exceptionally frequent values as included
1705  even if they are detected as outliers.
1706 
1707  Returns
1708  -------
1709  np.array (1d), list(str)
1710  Pair of bin edges and labels deduced from the series.
1711  Second element is None if the series is not detected as discrete.
1712  """
1713  debug = get_logger().debug
1714  debug('Determine binning for plot named %s', self.namename)
1715 
1716  if bins == 'flat':
1717  # Special value for the flat distribution binning
1718  n_bins = None
1719 
1720  elif isinstance(bins, collections.Iterable):
1721  # Bins is considered as an array
1722  # Construct a float array forwardable to root.
1723  bin_edges = bins
1724  bin_edges = array.array('d', bin_edges)
1725  bin_labels = None
1726  return bin_edges, bin_labels
1727 
1728  # If bins is not an iterable assume it is the number of bins or None
1729  elif bins is None:
1730  n_bins = None
1731  else:
1732  # Check that bins can be coerced to an integer.
1733  n_bins = int(bins)
1734 
1735  # Do not allow negative bin numbers
1736  if not n_bins > 0:
1737  message = 'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % bins
1738  raise ValueError(message)
1739 
1740  # Coerce values to a numpy array. Do not copy if already a numpy array.
1741  xs = np.array(xs, copy=False)
1742 
1743  if self.is_binaryis_binary(xs) or (allow_discrete and self.is_discreteis_discrete(xs)):
1744  # This covers also the case
1745  debug('Discrete binning values encountered')
1746  finite_xs = xs[np.isfinite(xs)]
1747  unique_xs = np.unique(finite_xs)
1748 
1749  # Crop the unique values between the lower and upper bound
1750  if lower_bound is None:
1751  if len(unique_xs) == 0:
1752  if upper_bound is None:
1753  lower_bound = 0
1754  else:
1755  lower_bound = upper_bound - 1
1756  else:
1757  lower_bound = np.min(unique_xs)
1758  else:
1759  unique_xs = unique_xs[unique_xs >= lower_bound]
1760 
1761  if upper_bound is None:
1762  if len(unique_xs) == 0:
1763  upper_bound = lower_bound + 1
1764  else:
1765  upper_bound = np.min(unique_xs)
1766  else:
1767  unique_xs = unique_xs[unique_xs <= upper_bound]
1768 
1769  if n_bins is None:
1770  n_bins = len(unique_xs) or 1
1771 
1772  if len(unique_xs) > 0 and n_bins >= len(unique_xs):
1773  # Construct a float array forwardable to root.
1774  bin_edges = array.array('d', unique_xs)
1775  format_bin_label = self.format_bin_labelformat_bin_label
1776  bin_labels = [format_bin_label(value) for value in bin_edges]
1777  bin_edges.append(bin_edges[-1] + 1)
1778  return bin_edges, bin_labels
1779 
1780  else:
1781  # Ambiguous case what to do in case of a number of requested bins
1782  # that is lower than the number of unique values?
1783 
1784  # Continue with an equistant binning for now.
1785  pass
1786 
1787  debug('Lower bound %s', lower_bound)
1788  debug('Upper bound %s', upper_bound)
1789  debug('N bins %s', n_bins)
1790 
1791  else:
1792  bin_range = self.determine_bin_rangedetermine_bin_range(xs,
1793  stackbys=stackbys,
1794  n_bins=n_bins,
1795  lower_bound=lower_bound,
1796  upper_bound=upper_bound,
1797  outlier_z_score=outlier_z_score,
1798  include_exceptionals=include_exceptionals)
1799 
1800  n_bins, lower_bound, upper_bound = bin_range
1801 
1802  n_bin_edges = n_bins + 1
1803  if lower_bound != upper_bound:
1804  if bins == "flat":
1805  debug("Creating flat distribution binning")
1806  precentiles = np.linspace(0.0, 100.0, n_bin_edges)
1807  bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], precentiles))
1808  else:
1809  # Correct the upper bound such that all values are strictly smaller than the upper bound
1810  # Make one step in single precision in the positive direction
1811  bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1812 
1813  # Reinforce the upper and lower bound to be exact
1814  # Also expand the upper bound by an epsilon
1815  # to prevent the highest value in xs from going in the overflow bin
1816  bin_edges[0] = lower_bound
1817  bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1818  debug('Bins %s', bin_edges)
1819 
1820  else:
1821  # Fall back if the array contains only one value
1822  bin_edges = [lower_bound, upper_bound + 1]
1823 
1824  # Construct a float array forwardable to root.
1825  bin_edges = array.array('d', bin_edges)
1826  debug('Bins %s for %s', bin_edges, self.namename)
1827  return bin_edges, None
1828 
1830  xs,
1831  stackbys=None,
1832  n_bins=None,
1833  lower_bound=None,
1834  upper_bound=None,
1835  outlier_z_score=None,
1836  include_exceptionals=True):
1837  """Calculates the number of bins, the lower bound and the upper bound from a given data series
1838  estimating the values that are not given.
1839 
1840  If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1841  The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1842  mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1843  quantile range (IQR).
1844 
1845  If additional include_exceptionals is true the method tries to find exceptional values in the series
1846  and always include them in the range if it finds any.
1847  Exceptional values means exact values that appear often in the series for whatever reason.
1848  Possible reasons include
1849  * Interal / default values
1850  * Failed evaluation conditions
1851  * etc.
1852  which should be not cropped away automatically if you are locking on the quality of your data.
1853 
1854  Parameters
1855  ----------
1856  xs : numpy.ndarray (1d)
1857  Data point for which a binning should be found.
1858  stackbys : numpy.ndarray (1d)
1859  Categories of the data points to be distinguishable
1860  n_bins : int or None, optional
1861  Preset number of desired bins. The default, None, means the bound should be extracted from data.
1862  The rice rule is used the determine the number of bins.
1863  lower_bound : float or None, optional
1864  Preset lower bound of the binning range.
1865  The default, None, means the bound should be extracted from data.
1866  upper_bound : float or None, optional
1867  Preset upper bound of the binning range.
1868  The default, None, means the bound should be extracted from data.
1869  outlier_z_score : float or None, optional
1870  Threshold z-score of outlier detection.
1871  The default, None, means no outlier detection.
1872  include_exceptionals : bool, optional
1873  If the outlier detection is active this switch indicates,
1874  if values detected as exceptionally frequent shall be included
1875  nevertheless into the binning range. Default is True,
1876  which means exceptionally frequent values as included
1877  even if they are detected as outliers.
1878 
1879  Returns
1880  -------
1881  n_bins, lower_bound, upper_bound : int, float, float
1882  A triple of found number of bins, lower bound and upper bound of the binning range.
1883  """
1884 
1885  if stackbys is not None:
1886  unique_stackbys = np.unique(stackbys)
1887  stack_ranges = []
1888  for value in unique_stackbys:
1889  if np.isnan(value):
1890  indices_for_value = np.isnan(stackbys)
1891  else:
1892  indices_for_value = stackbys == value
1893 
1894  stack_lower_bound, stack_upper_bound = \
1895  self.determine_rangedetermine_range(xs[indices_for_value],
1896  lower_bound=lower_bound,
1897  upper_bound=upper_bound,
1898  outlier_z_score=outlier_z_score,
1899  include_exceptionals=include_exceptionals)
1900 
1901  stack_ranges.append([stack_lower_bound, stack_upper_bound])
1902 
1903  lower_bound = np.nanmin([lwb for lwb, upb in stack_ranges])
1904  upper_bound = np.nanmax([upb for lwb, upb in stack_ranges])
1905 
1906  else:
1907  lower_bound, upper_bound = self.determine_rangedetermine_range(xs,
1908  lower_bound=lower_bound,
1909  upper_bound=upper_bound,
1910  outlier_z_score=outlier_z_score,
1911  include_exceptionals=include_exceptionals)
1912 
1913  if n_bins is None:
1914  # Assume number of bins according to the rice rule.
1915  # The number of data points should not include outliers.
1916  n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1917  rice_n_bins = int(statistics.rice_n_bin(n_data))
1918  n_bins = rice_n_bins
1919 
1920  else:
1921  n_bins = int(n_bins)
1922  # Do not allow negative bin numbers
1923  if not n_bins > 0:
1924  message = 'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % n_bins
1925  raise ValueError(message)
1926 
1927  return n_bins, lower_bound, upper_bound
1928 
1930  xs,
1931  lower_bound=None,
1932  upper_bound=None,
1933  outlier_z_score=None,
1934  include_exceptionals=True):
1935  """
1936  Parameters
1937  ----------
1938  xs : numpy.ndarray (1d)
1939  Data point for which a binning should be found.
1940  lower_bound : float or None, optional
1941  Preset lower bound of the binning range.
1942  The default, None, means the bound should be extracted from data.
1943  upper_bound : float or None, optional
1944  Preset upper bound of the binning range.
1945  The default, None, means the bound should be extracted from data.
1946  outlier_z_score : float or None, optional
1947  Threshold z-score of outlier detection.
1948  The default, None, means no outlier detection.
1949  include_exceptionals : bool, optional
1950  If the outlier detection is active this switch indicates,
1951  if values detected as exceptionally frequent shall be included
1952  nevertheless into the binning range. Default is True,
1953  which means exceptionally frequent values as included
1954  even if they are detected as outliers.
1955 
1956  Returns
1957  -------
1958  lower_bound, upper_bound : float, float
1959  A pair of found lower bound and upper bound of series.
1960  """
1961  debug = get_logger().debug
1962 
1963  finite_xs_indices = np.isfinite(xs)
1964  if np.any(finite_xs_indices):
1965  finite_xs = xs[finite_xs_indices]
1966  else:
1967  finite_xs = xs
1968 
1969  make_symmetric = False
1970  exclude_outliers = outlier_z_score is not None and (lower_bound is None or upper_bound is None)
1971 
1972  # Look for exceptionally frequent values in the series, e.g. interal delta values like -999
1973  if include_exceptionals or exclude_outliers:
1974  exceptional_xs = self.get_exceptional_valuesget_exceptional_values(finite_xs)
1975  exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1976 
1977  # Prepare for the estimation of outliers
1978  if exclude_outliers:
1979  if not np.all(exceptional_indices):
1980  # Exclude excecptional values from the estimation to be unbiased
1981  # even in case exceptional values fall into the central region near the mean
1982  x_mean, x_std = self.get_robust_mean_and_stdget_robust_mean_and_std(finite_xs[~exceptional_indices])
1983  else:
1984  x_mean, x_std = np.nan, np.nan
1985 
1986  make_symmetric = abs(x_mean) < x_std / 5.0 and lower_bound is None and upper_bound is None
1987 
1988  if include_exceptionals and len(exceptional_xs) != 0:
1989  lower_exceptional_x = np.min(exceptional_xs)
1990  upper_exceptional_x = np.max(exceptional_xs)
1991  make_symmetric = False
1992  else:
1993  lower_exceptional_x = np.nan
1994  upper_exceptional_x = np.nan
1995 
1996  # Find the lower bound, if it is not given.
1997  if lower_bound is None:
1998  try:
1999  lower_bound = np.min(finite_xs)
2000  except ValueError:
2001  lower_bound = -999
2002  # Clip the lower bound by outliers that exceed the given z score
2003  if outlier_z_score is not None:
2004  # The lower bound at which outliers exceed the given z score
2005  lower_outlier_bound = x_mean - outlier_z_score * x_std
2006 
2007  # Clip the lower bound such that it concides with an actual value,
2008  # which prevents empty bins from being produced
2009  indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2010 
2011  if np.any(indices_above_lower_outlier_bound):
2012  lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2013 
2014  # However we want to include at least the exceptional values in the range if there are any.
2015  lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2016 
2017  debug('Lower bound after outlier detection')
2018  debug('Lower bound %s', lower_bound)
2019  debug('Lower outlier bound %s', lower_outlier_bound)
2020 
2021  # Find the upper bound, if it is not given
2022  if upper_bound is None:
2023  try:
2024  upper_bound = np.max(finite_xs)
2025  except ValueError:
2026  upper_bound = 999
2027  if outlier_z_score is not None:
2028  # The upper bound at which outliers exceed the given z score
2029  upper_outlier_bound = x_mean + outlier_z_score * x_std
2030 
2031  # Clip the upper bound such that it concides with an actual value,
2032  # which prevents empty bins from being produced
2033  indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2034 
2035  if np.any(indices_below_upper_outlier_bound):
2036  upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2037 
2038  # However we want to include at least the exceptional values in the range if there are any.
2039  upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2040 
2041  debug('Upper bound after outlier detection')
2042  debug('Upper bound %s', upper_bound)
2043  debug('Upper outlier bound %s', upper_outlier_bound)
2044 
2045  if make_symmetric and lower_bound < 0 and upper_bound > 0:
2046  if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2047  abs_bound = max(abs(lower_bound), abs(upper_bound))
2048  lower_bound = -abs_bound
2049  upper_bound = abs_bound
2050 
2051  return lower_bound, upper_bound
2052 
2053  @classmethod
2054  def set_additional_stats_tf1(cls, histogram):
2055  """Combining fit TF1 with the additional statistics and attach them to the histogram.
2056 
2057  Parameters
2058  ----------
2059  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2060  Something having a GetListOfFunctions method that should hold
2061  the combined fit and additional statistics function.
2062  """
2063  additional_stats_tf1 = cls.create_additional_stats_tf1create_additional_stats_tf1(histogram)
2064  cls.set_tf1set_tf1(histogram, additional_stats_tf1)
2065 
2066  @classmethod
2067  def set_fit_tf1(cls, histogram, fit_tf1):
2068  """Combining fit TF1 with the additional statistics and attach them to the histogram.
2069 
2070  Parameters
2071  ----------
2072  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2073  Something having a GetListOfFunctions method that should hold
2074  the combined fit and additional statistics function.
2075  """
2076  additional_stats_tf1 = cls.create_additional_stats_tf1create_additional_stats_tf1(histogram)
2077  combined_tf1 = cls.combine_fit_and_additional_statscombine_fit_and_additional_stats(fit_tf1, additional_stats_tf1)
2078  cls.set_tf1set_tf1(histogram, combined_tf1)
2079 
2080  @classmethod
2081  def set_tf1(cls, histogram, tf1):
2082  """Set the attached TF1 of the histogram.
2083 
2084  Parameters
2085  ----------
2086  histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2087  Something having a GetListOfFunctions method that should hold
2088  the combined fit and additional statistics function.
2089  """
2090  # Delete any functions formally added
2091  cls.delete_tf1delete_tf1(histogram)
2092  if tf1 is not None:
2093  tf1.SetName("FitAndStats")
2094  histogram.GetListOfFunctions().Add(tf1)
2095 
2096  @classmethod
2097  def delete_tf1(cls, histogram):
2098  """Delete the attached TF1 from the histogram
2099 
2100  Parameters
2101  ----------
2102  histogram : ROOT.TH1 or ROOT.TGraph
2103  Something having a GetListOfFunctions method that holds the fit function
2104  """
2105  tf1 = histogram.FindObject("FitAndStats")
2106  if tf1:
2107  function_list = histogram.GetListOfFunctions()
2108  function_list.Remove(tf1)
2109 
2110  @classmethod
2111  def create_additional_stats_tf1(cls, histogram):
2112  """Create a TF1 with the additional statistics from the histogram as parameters.
2113 
2114  Parameters
2115  ----------
2116  histogram : ROOT.TH1 or ROOT.TGraph
2117  Something having a GetListOfFunctions method that holds the additional statistics.
2118 
2119  Returns
2120  -------
2121  ROOT.TF1
2122  Function with the additional statistics as parameters.
2123  """
2124 
2125  additional_stats = cls.get_additional_statsget_additional_stats(histogram)
2126  if not additional_stats:
2127  return None
2128 
2129  # Create dummy function, which displays additional statistics in the legend, when added to a histogram.
2130  # Dummy range to serve the functions
2131  lower_bound = 0
2132  upper_bound = 0
2133 
2134  # Create a formula which is zero in all cases but has space for n parameters
2135  # Formula string looks like 0*[0]+0*[1]+0*[2]+...
2136  formula_string = '+'.join('0*[' + str(i) + ']' for i in range(len(additional_stats)))
2137 
2138  # Compose a function that carries the addtional information
2139  additional_stats_tf1 = ROOT.TF1("Stats", formula_string, lower_bound, upper_bound)
2140 
2141  for (i, (label, value)) in enumerate(additional_stats.items()):
2142  # root 6 does not like labels with spaces ..
2143  # bug report: https://sft.its.cern.ch/jira/browse/ROOT-7460
2144  # therefor this workaround:
2145  label = label.replace(" ", "-")
2146  additional_stats_tf1.SetParName(i, label)
2147  additional_stats_tf1.FixParameter(i, value)
2148 
2149  return additional_stats_tf1
2150 
2151  @classmethod
2152  def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1):
2153  """Combine the fit function and the function carrying the additional statistics to one function.
2154 
2155  Parameters
2156  ----------
2157  fit_tf1 : ROOT.TF1
2158  The fit function
2159  additional_stats_tf1 : ROOT.TF1
2160  The function carrying the additional statistics as parameters
2161 
2162  Returns
2163  -------
2164  ROOT.TF1
2165  """
2166  if additional_stats_tf1 is None:
2167  return fit_tf1
2168 
2169  # Combine both TF1 functions
2170  # Get the lower and upper bound of the fit
2171  # Use the pass-by reference containers from pyROOT to be able to call the function
2172  lower_bound = ctypes.c_double()
2173  upper_bound = ctypes.c_double()
2174  fit_tf1.GetRange(lower_bound, upper_bound)
2175  title = fit_tf1.GetTitle()
2176 
2177  combined_formula = additional_stats_tf1.GetExpFormula().Data() + '+' + fit_tf1.GetExpFormula().Data()
2178  combined_tf1 = ROOT.TF1("Combined", combined_formula, lower_bound.value, upper_bound.value)
2179  combined_tf1.SetTitle(title)
2180 
2181  # Transfer the fitted parameters
2182  chi2 = fit_tf1.GetChisquare()
2183  combined_tf1.SetChisquare(chi2)
2184 
2185  ndf = fit_tf1.GetNDF()
2186  combined_tf1.SetNDF(ndf)
2187 
2188  n_stats_parameters = additional_stats_tf1.GetNpar()
2189  # n_fit_parameters = fit_tf1.GetNpar()
2190  cls.copy_tf1_parameterscopy_tf1_parameters(additional_stats_tf1, combined_tf1)
2191  cls.copy_tf1_parameterscopy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2192 
2193  return combined_tf1
2194 
2195  @classmethod
2196  def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0):
2197  """Copy the parameters of one TF1 to another.
2198 
2199  Parameters
2200  ----------
2201  tf1_source : ROOT.TF1
2202  Take parameters from here
2203  tf1_target : ROOT.TF1
2204  Copy them to here.
2205  offset : int, optional
2206  Index of the first target parameter to which to copy.
2207  """
2208  n_parameters = tf1_source.GetNpar()
2209 
2210  # Helper variables for pyROOT's mechanism to call functions by reference
2211  lower_bound = ctypes.c_double()
2212  upper_bound = ctypes.c_double()
2213 
2214  for i_source in range(n_parameters):
2215  parameter_name = tf1_source.GetParName(i_source)
2216  i_target = tf1_target.GetParNumber(parameter_name)
2217 
2218  # Workaround for a ROOT bug
2219  if i_target == -1:
2220  for i_target in range(tf1_target.GetNpar()):
2221  if parameter_name == tf1_target.GetParName(i_target):
2222  break
2223  else:
2224  i_target = -1
2225  continue
2226 
2227  tf1_target.SetParameter(i_target,
2228  tf1_source.GetParameter(i_source))
2229  tf1_target.SetParError(i_target,
2230  tf1_source.GetParError(i_source))
2231 
2232  tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2233  tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2234 
2236  """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2237  # Forward the attributes to the plot by auto assignment
2238 
2239  self.checkcheckcheckcheck = self.checkcheckcheckcheck
2240 
2242 
2244 
2245 
2246  self.xlabelxlabelxlabelxlabel = self.xlabelxlabelxlabelxlabel
2247 
2248  self.ylabelylabelylabelylabel = self.ylabelylabelylabelylabel
2249 
2250  self.titletitletitletitle = self.titletitletitletitle
2251 
2252  def set_maximum(self, maximum):
2253  """Sets the maximum of the vertical plotable range"""
2254  for histogram in self.histogramshistograms:
2255  if isinstance(histogram, ROOT.TH1):
2256  histogram.SetMaximum(histogram.GetMaximum(maximum))
2257  else:
2258  histogram.SetMaximum(maximum)
2259 
2260  def set_minimum(self, minimum):
2261  """Sets the minimum of the vertical plotable range"""
2262  for histogram in self.histogramshistograms:
2263  if isinstance(histogram, ROOT.TH1):
2264  histogram.SetMinimum(histogram.GetMinimum(minimum))
2265  else:
2266  histogram.SetMinimum(minimum)
2267 
2268  @classmethod
2269  def set_tstyle(cls):
2270  """Set the style such that the additional stats entries are shown by the TBrowser"""
2271  belle2_validation_style_name = "belle2_validation_style"
2272  belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2273  if not belle2_validation_tstyle:
2274  belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2275 
2276  opt_fit = 112
2277  belle2_validation_tstyle.SetOptFit(opt_fit)
2278 
2279  opt_stat = 111111
2280  belle2_validation_tstyle.SetOptStat(opt_stat)
2281  ROOT.gROOT.SetStyle(belle2_validation_style_name)
2282 
2283  # belle2_validation_tstyle.SetLineStyleString(cls.very_sparse_dots_line_style_index, "4 2000")
2284 
2285  else:
2286  belle2_validation_tstyle.cd()
2287 
2288 
2289 def test():
2290  """Simple test methode"""
2291  ValidationPlot.set_tstyle()
2292 
2293  # Test a histogram plot with some nan and inf values
2294  normal_distributed_values = np.random.randn(1000)
2295 
2296  for i in range(10):
2297  normal_distributed_values[i] = np.nan
2298 
2299  for i in range(10, 20):
2300  normal_distributed_values[i] = np.inf
2301 
2302  for i in range(20, 30):
2303  normal_distributed_values[i] = -np.inf
2304 
2305  validation_histogram = ValidationPlot('test_hist')
2306  validation_histogram.hist(normal_distributed_values)
2307  validation_histogram.title = 'A normal distribution'
2308  validation_histogram.xlabel = 'normal'
2309  validation_histogram.ylabel = 'frequency'
2310  validation_histogram.fit_gaus()
2311 
2312  # Test for a cumulated histogram - cumulation from left to right
2313  cumulated_histogram = ValidationPlot('test_cum_hist')
2314  cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2315  cumulated_histogram.title = 'A cumulated normal distribution'
2316  cumulated_histogram.xlabel = 'normal'
2317  cumulated_histogram.ylabel = 'cdf'
2318  cumulated_histogram.show()
2319 
2320  # Test stacked histogram
2321  # Make a random selection of 40%
2322  stackby = np.random.binomial(1.0, 0.40, 1000)
2323  stacked_validation_histogram = ValidationPlot('test_stacked_hist')
2324  stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2325 
2326  # Make a scatterplot with two species.
2327  x = np.random.randn(1000)
2328  y = 3 * np.random.randn(1000)
2329  ones = np.ones_like(x)
2330  angle = np.pi / 2
2331 
2332  x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2333  y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2334 
2335  stacked_validation_scatter = ValidationPlot('test_stacked_scatter')
2336  stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2337 
2338  # Make a profile plot with the two species
2339  stacked_validation_profile = ValidationPlot('test_stacked_profile')
2340  stacked_validation_profile.profile(x1, y1, stackby=stackby)
2341 
2342  # Make a two dimensional histogram with two species
2343  stacked_validation_hist2d = ValidationPlot('test_stacked_hist2d')
2344  stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2345 
2346  # Test a profile with a diagonal fit
2347  x = np.linspace(-1, 1, 1000)
2348  y = x.copy()
2349  x[0] = np.nan
2350  diagonal_plot = ValidationPlot('test_diag')
2351  diagonal_plot.profile(x, y, bins=50)
2352  diagonal_plot.fit_line()
2353 
2354  # Test if cumulation works for profile plots
2355  cumulated_profile = ValidationPlot('test_cum_profile')
2356  cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2357 
2358  tfile = ROOT.TFile('test.root', 'RECREATE')
2359 
2360  validation_histogram.write(tfile)
2361 
2362  with root_cd("expert") as tdirectory1:
2363  diagonal_plot.write(tdirectory1)
2364  cumulated_profile.write(tdirectory1)
2365  cumulated_histogram.write(tdirectory1)
2366 
2367  with root_cd("stacked") as tdirectory2:
2368  stacked_validation_histogram.write(tdirectory2)
2369  stacked_validation_scatter.write()
2370  stacked_validation_profile.write()
2371  stacked_validation_hist2d.write()
2372 
2373  tfile.Close()
2374 
2375  tfile = ROOT.TFile('test.root')
2376  tBrowser = ROOT.TBrowser()
2377  tBrowser.BrowseObject(tfile)
2378  input()
2379  tfile.Close()
2380 
2381 
2382 if __name__ == '__main__':
2383  test()
title
cached value of the title for this plot
Definition: plot.py:2250
pvalue_warn
custom levels for pvalue warnings
Definition: plot.py:233
def hist(self, xs, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, is_expert=True)
Definition: plot.py:252
y_log
Indicator whether the y axes should be displayed as a log scale.
Definition: plot.py:239
def create_stack(cls, histograms, name, reverse_stack, force_graph=False)
Definition: plot.py:1087
def create_additional_stats_tf1(cls, histogram)
Definition: plot.py:2111
description
description of the plot
Definition: plot.py:2243
contact
contact information for this plot
Definition: plot.py:2241
upper_bound
upper right corner of the hisogram
Definition: plot.py:540
def write(self, tdirectory=None)
Definition: plot.py:753
def fit_gaus(self, z_score=None)
Definition: plot.py:614
ylabel
default label for the histogram's Y axis
Definition: plot.py:280
int very_sparse_dots_line_style_index
A an index that reference to a dot spacing such that the line is almost invisible for scatter.
Definition: plot.py:182
def is_discrete(xs, max_n_unique=None)
Definition: plot.py:927
_contact
Contact email address for display on the validation page.
Definition: plot.py:212
pvalue_error
custom levels for pvalue errors
Definition: plot.py:236
def grapherrors(self, xs_and_err, ys_and_err, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
Definition: plot.py:434
def delete_tf1(cls, histogram)
Definition: plot.py:2097
_title
Title of the validation plot.
Definition: plot.py:221
def create_1d(self, th1_factory, xs, ys=None, weights=None, bins=None, stackby=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None)
Definition: plot.py:990
def description(self, description)
Definition: plot.py:861
def __init__(self, name, referenceFileName=None)
Definition: plot.py:185
def add_stats_entry(cls, histogram, label, value)
Definition: plot.py:1465
_xlabel
X axes label of the validation plot.
Definition: plot.py:215
xlabel
cached value of the x-axis label for this plot
Definition: plot.py:2246
def set_tf1(cls, histogram, tf1)
Definition: plot.py:2081
def fill_into(self, plot, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1247
def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1)
Definition: plot.py:2152
def create(self, histogram_template, xs, ys=None, weights=None, stackby=None, cumulation_direction=None, reverse_stack=None)
Definition: plot.py:1051
def set_maximum(self, maximum)
Definition: plot.py:2252
def add_nan_inf_stats(cls, histogram, name, xs)
Definition: plot.py:1439
def set_color(self, tobject, root_i_color)
Definition: plot.py:1228
def cumulate(cls, histogram, cumulation_direction=None)
Definition: plot.py:1571
def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0)
Definition: plot.py:2196
def profile(self, xs, ys, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, y_binary=None, y_log=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, gaus_z_score=None, is_expert=True)
Definition: plot.py:299
_is_expert
per default all plots are expert and must be set to non-expert explicitly
Definition: plot.py:230
def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None)
Definition: plot.py:681
_description
Description of the plot purpose for display on the validation page.
Definition: plot.py:206
histograms
A list of the histograms that make up the plot.
Definition: plot.py:227
def get_additional_stats(cls, histogram)
Definition: plot.py:1482
_check
Detailed check instructions for display on the validation page.
Definition: plot.py:209
name
A unique name to be used as the name of the ROOT object to be generated.
Definition: plot.py:200
def set_fit_tf1(cls, histogram, fit_tf1)
Definition: plot.py:2067
def set_minimum(self, minimum)
Definition: plot.py:2260
def fill_into_grouped(self, histogram_template, xs, ys=None, weights=None, groupbys=None, groupby_label="group")
Definition: plot.py:1196
plot
The main plot object, may contain one or more (in case of stacked pltos) histograms.
Definition: plot.py:224
def determine_range(self, xs, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1934
def set_additional_stats_tf1(cls, histogram)
Definition: plot.py:2054
def scatter(self, xs, ys, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
Definition: plot.py:375
_ylabel
Y axes label of the validation plot.
Definition: plot.py:218
referenceFileName
name of the reference file, if not None the binning will be read from there
Definition: plot.py:203
def fill_into_tgrapherror(self, graph, xs, ys, filter=None)
Definition: plot.py:1261
def hist2d(self, xs, ys, weights=None, stackby=None, bins=(None, None), lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), allow_discrete=(False, False), quantiles=None, is_expert=True)
Definition: plot.py:498
lower_bound
lower left corner of the histogram
Definition: plot.py:538
def determine_bin_range(self, xs, stackbys=None, n_bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1836
def determine_bin_edges(self, xs, stackbys=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False)
Definition: plot.py:1677
def gaus_slice_fit(cls, th2, name, z_score=None)
Definition: plot.py:1505
check
cached value of the user-check action for this plot
Definition: plot.py:2239
def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1356
def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False)
Definition: plot.py:1119
def fill_into_tgraph(self, graph, xs, ys, filter=None)
Definition: plot.py:1272
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)
Definition: Splitter.h:85