Belle II Software prerelease-10-00-00a
plot.py
1#!/usr/bin/env python3
2
3
10
11import sys
12import math
13import collections
14import array
15import numpy as np
16import ctypes
17
18import ROOT
19
20import basf2
21
22from tracking.root_utils import root_cd, root_save_name
23
24from tracking.validation import statistics
25
26import logging
27# @cond internal_test
28
29
30def get_logger():
31 """Get for the logging.Logger instance of this module
32
33 Returns
34 -------
35 logging.Logger
36 Logger instance of this module
37 """
38 return logging.getLogger(__name__)
39
40
41
42units_by_quantity_name = {
43 'x': 'cm',
44 'y': 'cm',
45 'z': 'cm',
46 'E': 'GeV',
47 'px': 'GeV',
48 'p_{x}': 'GeV',
49 'py': 'GeV',
50 'p_{y}': 'GeV',
51 'pz': 'GeV',
52 'p_{z}': 'GeV',
53 'pt': 'GeV',
54 'p_{t}': 'GeV',
55 'd0': 'cm',
56 'd_{0}': 'cm',
57 'phi0': None,
58 '#phi_{0}': None,
59 'omega': '1/cm',
60 '#omega': '1/cm',
61 'z0': 'cm',
62 'z_{0}': 'cm',
63 'tan_lambda': None,
64 'tan #lambda': None}
65
66
67def get_unit(quantity_name):
68 """Infers the unit of a quantity from its name.
69
70 Assumes the standard Belle II unit system.
71
72 Currently looks up the quantity string from units_by_quantity_name.
73
74 Parameters
75 ----------
76 quantity_name : str
77 Name of a quantity (E.g. pt, x, ...)
78
79 Returns
80 -------
81 str
82 """
83
84 unit = units_by_quantity_name.get(quantity_name, None)
85 return unit
86
87
88def compose_axis_label(quantity_name, unit=None):
89 """Formats a quantity name and a unit to a label for a plot axes.
90
91 If the unit is not given to is tried to infer it
92 from the quantity name by the get_unit function.
93
94 Parameters
95 ----------
96 quantity_name : str
97 Name of the quantity to be displayed at the axes
98 unit : str, optional
99 The unit of the quantity. Defaults to get_unit(quantity_name)
100
101 Returns
102 -------
103 str
104 """
105
106 if unit is None:
107 unit = get_unit(quantity_name)
108
109 if unit is None:
110 axis_label = quantity_name
111 else:
112 axis_label = f'{quantity_name} ({unit})'
113
114 return axis_label
115
116
117def get1DBinningFromReference(name, refFileName):
118 """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
119
120 @param name : name of the TH1 object to be looked for in the file
121 @param refFileName : name of the reference file where the object is searched for
122
123 @return int nbin, float xmin, float xmax of the TH1
124 """
125
126 nbins = None
127 x_min = None
128 x_max = None
129
130 if refFileName is None or refFileName == "":
131 return nbins, x_min, x_max
132
133 # store current directory to not confuse directories by opening a TFile
134 oldDirectory = ROOT.gROOT.CurrentDirectory().load()
135
136 tfile = ROOT.TFile(refFileName)
137 if tfile.IsOpen():
138 objptr = tfile.Get(name)
139 if objptr and objptr.InheritsFrom("TH1"):
140 nbins = objptr.GetNbinsX()
141 x_min = objptr.GetXaxis().GetXmin()
142 x_max = objptr.GetXaxis().GetXmax()
143 else:
144 basf2.B2WARNING('Requested object with name: ' + name + ' not found in file: ' + refFileName + " (or not a TH1)")
145 else:
146 basf2.B2WARNING('Requested file: ' + refFileName + ' could not be opened')
147
148 tfile.Close()
149
150 # set current directory back to original one
151 oldDirectory.cd()
152
153 return nbins, x_min, x_max
154
155
156
157StatsEntry = ROOT.TParameter(float)
158
159
160class ValidationPlot:
161
162 """Class for generating a validation plot for the Belle II validation page.
163
164 Typically it generates plots from values stored in numpy arrays and feeds them into
165 plot ROOT classes for storing them.
166
167 It implements an automatic binning procedure based on the rice rule and
168 robust z score outlier detection.
169
170 It also keeps track of additional statistics typically neglected by ROOT such as a count
171 for the non finit values such as NaN, +Inf, -Inf.
172
173 The special attributes for the Belle II validation page like
174 * title
175 * contract
176 * description
177 * check
178 are exposed as properties of this class.
179 """
180
181
182 very_sparse_dots_line_style_index = 28
183 # This line style is defined in the set_tstyle method below.
184
185 def __init__(self, name, referenceFileName=None):
186 """Constructor of the ValidationPlot
187
188 Parameters
189 ----------
190 name : str
191 A unique name to be used as the name of the ROOT object to be generated
192
193 referenceFileName : str
194 name of a reference file. If set the code will try to get the histogram or profile
195 from that file and determine the number of bins and upper and lower bound
196 (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
197 """
198
199
200 self.name = root_save_name(name)
201
202
203 self.referenceFileName = referenceFileName
204
205
206 self._description = ''
207
208
209 self._check = ''
210
211
212 self._contact = ''
213
214
215 self._xlabel = ''
216
217
218 self._ylabel = ''
219
220
221 self._title = ''
222
223
224 self.plot = None
225
226
227 self.histograms = []
228
229
230 self._is_expert = True
231
232
233 self.pvalue_warn = None
234
235
236 self.pvalue_error = None
237
238
239 self.y_log = False
240
241 def hist(self,
242 xs,
243 weights=None,
244 stackby=None,
245 bins=None,
246 lower_bound=None,
247 upper_bound=None,
248 outlier_z_score=None,
249 include_exceptionals=True,
250 allow_discrete=False,
251 cumulation_direction=None,
252 is_expert=True):
253 """Fill the plot with a one dimensional histogram."""
254
255 # if referenceFileName was set the binning will taken from there
256 if self.referenceFileName is not None:
257 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
258 if n is not None and xmin is not None and xmax is not None:
259 bins = n
260 upper_bound = xmax
261 lower_bound = xmin
262
263 th1_factory = ROOT.TH1D
264 self._is_expert = is_expert
265
266 self.create_1d(th1_factory,
267 xs,
268 weights=weights,
269 stackby=stackby,
270 bins=bins,
271 lower_bound=lower_bound,
272 upper_bound=upper_bound,
273 outlier_z_score=outlier_z_score,
274 include_exceptionals=include_exceptionals,
275 allow_discrete=allow_discrete,
276 cumulation_direction=cumulation_direction)
277
278 if not self.ylabel:
279
280 self.ylabel = 'count'
281
282 return self
283
284 def profile(self,
285 xs,
286 ys,
287 weights=None,
288 stackby=None,
289 bins=None,
290 lower_bound=None,
291 upper_bound=None,
292 y_binary=None,
293 y_log=None,
294 outlier_z_score=None,
295 include_exceptionals=True,
296 allow_discrete=False,
297 cumulation_direction=None,
298 gaus_z_score=None,
299 is_expert=True,
300 is_asymmetry=False):
301 """Fill the plot with a one dimensional profile of one variable over another."""
302
303 # if referenceFileName was set the binning will taken from there
304 if self.referenceFileName is not None:
305 n = None
306 xmin = None
307 xmax = None
308 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
309 if n is not None and xmin is not None and xmax is not None:
310 bins = n
311 upper_bound = xmax
312 lower_bound = xmin
313
314 th1_factory = ROOT.TProfile
315 self._is_expert = is_expert
316 if gaus_z_score is None:
317 self.create_1d(th1_factory,
318 xs,
319 ys,
320 weights=weights,
321 stackby=stackby,
322 bins=bins,
323 lower_bound=lower_bound,
324 upper_bound=upper_bound,
325 outlier_z_score=outlier_z_score,
326 include_exceptionals=include_exceptionals,
327 allow_discrete=allow_discrete,
328 cumulation_direction=cumulation_direction)
329 else:
330 # Introduce a dummy name for the temporary two dimensional histogram
331 self.name = "_" + self.name
332 self.hist2d(xs, ys=ys, weights=weights, stackby=stackby,
333 bins=(bins, None),
334 lower_bound=(lower_bound, None),
335 upper_bound=(upper_bound, None),
336 outlier_z_score=(outlier_z_score, outlier_z_score),
337 include_exceptionals=(include_exceptionals, True),
338 allow_discrete=(allow_discrete, False),
339 is_expert=is_expert)
340
341 self.name = self.name[1:]
342 profiles = []
343 for histogram in self.histograms:
344 profile = self.gaus_slice_fit(histogram,
345 name=histogram.GetName()[1:],
346 z_score=gaus_z_score)
347 profiles.append(profile)
348 self.histograms = profiles
349 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False)
350
351 if y_log:
352 self.y_log = True
353
354 if y_binary or self.is_binary(ys):
355 if not self.ylabel:
356 self.ylabel = 'probability'
357
358 min_y = 0
359 if (is_asymmetry):
360 min_y = -1.05
361 for histogram in self.histograms:
362 histogram.SetMinimum(min_y)
363 histogram.SetMaximum(1.05)
364
365 self.plot.SetMinimum(min_y)
366 self.plot.SetMaximum(1.05)
367
368 return self
369
370 def scatter(self,
371 xs,
372 ys,
373 stackby=None,
374 lower_bound=(None, None),
375 upper_bound=(None, None),
376 outlier_z_score=(None, None),
377 include_exceptionals=(True, True),
378 max_n_data=100000,
379 is_expert=True):
380 """Fill the plot with a (unbinned) two dimensional scatter plot"""
381
382 self._is_expert = is_expert
383
384 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
385 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
386 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
387 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
388
389 x_lower_bound, x_upper_bound = self.determine_range(
390 xs,
391 lower_bound=x_lower_bound,
392 upper_bound=x_upper_bound,
393 outlier_z_score=x_outlier_z_score,
394 include_exceptionals=x_include_exceptionals
395 )
396
397 y_lower_bound, y_upper_bound = self.determine_range(
398 ys,
399 lower_bound=y_lower_bound,
400 upper_bound=y_upper_bound,
401 outlier_z_score=y_outlier_z_score,
402 include_exceptionals=y_include_exceptionals
403 )
404
405 graph = ROOT.TGraph()
406
407 graph.SetName(self.name)
408 graph.SetMarkerStyle(6)
409 graph.GetHistogram().SetOption("AP") # <-- this has no effect?
410
411 # Trying to make the graph lines invisible
412 color_index = 0 # white
413 # Transparent white
414 graph.SetLineColorAlpha(color_index, 0)
415 graph.SetLineStyle(self.very_sparse_dots_line_style_index)
416
417 # Transport the lower and upper bound as ranges of the axis
418 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
419 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
420
421 self.create(graph,
422 xs=xs,
423 ys=ys,
424 stackby=stackby,
425 reverse_stack=False)
426
427 return self
428
429 def grapherrors(self,
430 xs_and_err,
431 ys_and_err,
432 stackby=None,
433 lower_bound=(None, None),
434 upper_bound=(None, None),
435 outlier_z_score=(None, None),
436 include_exceptionals=(True, True),
437 max_n_data=100000,
438 is_expert=True):
439 """Fill the plot with a (unbinned) two dimensional scatter plot
440 xs_and_err and ys_and_err are tuples containing the values and the errors on these values
441 as numpy arrays
442 """
443
444 self._is_expert = is_expert
445
446 xs = xs_and_err[0]
447 ys = ys_and_err[0]
448
449 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
450 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
451 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
452 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
453
454 x_lower_bound, x_upper_bound = self.determine_range(
455 xs,
456 lower_bound=x_lower_bound,
457 upper_bound=x_upper_bound,
458 outlier_z_score=x_outlier_z_score,
459 include_exceptionals=x_include_exceptionals
460 )
461
462 y_lower_bound, y_upper_bound = self.determine_range(
463 ys,
464 lower_bound=y_lower_bound,
465 upper_bound=y_upper_bound,
466 outlier_z_score=y_outlier_z_score,
467 include_exceptionals=y_include_exceptionals
468 )
469
470 graph = ROOT.TGraphErrors()
471
472 graph.SetName(self.name)
473 graph.GetHistogram().SetOption("A")
474
475 graph.SetMarkerColor(4)
476 graph.SetMarkerStyle(21)
477
478 # Transport the lower and upper bound as ranges of the axis
479 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
480 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
481
482 self.create(graph,
483 xs=xs,
484 ys=ys,
485 stackby=stackby,
486 reverse_stack=False)
487
488 return self
489
490 def hist2d(self,
491 xs,
492 ys,
493 weights=None,
494 stackby=None,
495 bins=(None, None),
496 lower_bound=(None, None),
497 upper_bound=(None, None),
498 outlier_z_score=(None, None),
499 include_exceptionals=(True, True),
500 allow_discrete=(False, False),
501 quantiles=None,
502 is_expert=True):
503 """Fill the plot with a two dimensional histogram"""
504
505 name = self.name
506 # Introduce a dummy name for the temporary two dimensional histogram
507 if quantiles is not None:
508 name = "_" + self.name
509
510 x_bins, y_bins = self.unpack_2d_param(bins)
511 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
512 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
513 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
514 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
515 x_allow_discrete, y_allow_discrete = self.unpack_2d_param(allow_discrete)
516
517 if quantiles is not None:
518 y_include_exceptionals = True
519 y_allow_discrete = False
520
521 x_bin_edges, x_bin_labels = self.determine_bin_edges(xs,
522 stackbys=stackby,
523 bins=x_bins,
524 lower_bound=x_lower_bound,
525 upper_bound=x_upper_bound,
526 outlier_z_score=x_outlier_z_score,
527 include_exceptionals=x_include_exceptionals,
528 allow_discrete=x_allow_discrete)
529
530 y_bin_edges, y_bin_labels = self.determine_bin_edges(ys,
531 stackbys=stackby,
532 bins=y_bins,
533 lower_bound=y_lower_bound,
534 upper_bound=y_upper_bound,
535 outlier_z_score=y_outlier_z_score,
536 include_exceptionals=y_include_exceptionals,
537 allow_discrete=y_allow_discrete)
538
539 n_x_bins = len(x_bin_edges) - 1
540 n_y_bins = len(y_bin_edges) - 1
541
542 self.lower_bound = (x_bin_edges[0], y_bin_edges[0])
543
544 self.upper_bound = (x_bin_edges[-1], y_bin_edges[-1])
545
546 histogram = ROOT.TH2D(name,
547 '',
548 n_x_bins,
549 x_bin_edges,
550 n_y_bins,
551 y_bin_edges)
552
553 if x_bin_labels:
554 get_logger().info("Scatter plot %s is discrete in x.", name)
555 x_taxis = histogram.GetXaxis()
556 for i_x_bin, x_bin_label in enumerate(x_bin_labels):
557 x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
558 self.add_stats_entry(histogram, "dx", 1)
559
560 else:
561 x_bin_width = x_bin_edges[1] - x_bin_edges[0]
562 self.add_stats_entry(histogram, "dx", x_bin_width)
563
564 if y_bin_labels:
565 get_logger().info("Scatter plot %s is discrete in y.", name)
566 y_taxis = histogram.GetYaxis()
567 for i_y_bin, y_bin_label in enumerate(y_bin_labels):
568 y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
569 self.add_stats_entry(histogram, "dy", 1)
570
571 else:
572 y_bin_width = y_bin_edges[1] - y_bin_edges[0]
573 self.add_stats_entry(histogram, "dy", y_bin_width)
574
575 self.create(histogram, xs, ys=ys, weights=weights, stackby=stackby)
576
577 if quantiles is not None:
578 self.name = self.name[1:]
579 profiles = []
580 for histogram in self.histograms:
581 for quantile in quantiles:
582 profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] + '_' + str(quantile))
583
584 # Manually copy labels grumble grumble
585 x_taxis = histogram.GetXaxis()
586 new_x_taxis = profile.GetXaxis()
587 for i_bin in range(x_taxis.GetNbins() + 2):
588 label = x_taxis.GetBinLabel(i_bin)
589 if label != "":
590 new_x_taxis.SetBinLabel(i_bin, label)
591
592 # Remove faulty error values)
593 epsilon = sys.float_info.epsilon
594 for i_bin in range(0, profile.GetNbinsX() + 2):
595 profile.SetBinError(i_bin, epsilon)
596
597 profiles.append(profile)
598
599 self.histograms = profiles
600 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False, force_graph=True)
601
602 # Adjust the discrete bins after the filling to be equidistant
603 if x_bin_labels:
604 for histogram in self.histograms:
605 x_taxis = histogram.GetXaxis()
606 x_bin_edges = array.array("d", list(range(len(x_bin_labels) + 1)))
607 x_taxis.Set(n_x_bins, x_bin_edges)
608
609 # Adjust the discrete bins after the filling to be equidistant
610 if y_bin_labels:
611 for histogram in self.histograms:
612 x_taxis = histogram.GetXaxis()
613 y_bin_edges = array.array("d", list(range(len(y_bin_labels) + 1)))
614 y_taxis.Set(n_y_bins, y_bin_edges)
615
616 return self
617
618 def fit_gaus(self, z_score=None):
619 """Fit a Gaus bell curve to the central portion of a one dimensional histogram
620
621 The fit is applied to the central mean +- z_score * std interval of the histogram,
622 such that it is less influence by non gaussian tails further away than the given z score.
623
624 @param float z_score number of sigmas to include from the mean value of the histogram.
625 """
626 title = "gaus"
627 formula = "gaus"
628
629 plot = self.plot
630 if plot is None:
631 raise RuntimeError('Validation plot must be filled before it can be fitted.')
632
633 if not isinstance(plot, ROOT.TH1D):
634 raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
635
636 histogram = plot
637
638 fit_tf1 = ROOT.TF1("Fit", formula)
639 fit_tf1.SetTitle(title)
640 fit_tf1.SetParName(0, "n")
641 fit_tf1.SetParName(1, "mean")
642 fit_tf1.SetParName(2, "std")
643
644 n = histogram.GetSumOfWeights()
645 mean = histogram.GetMean()
646 std = histogram.GetStdDev()
647
648 fit_tf1.SetParameter(0, n)
649 fit_tf1.SetParameter(1, mean)
650 fit_tf1.SetParameter(2, std)
651
652 fit_options = "LM"
653 return self.fit(fit_tf1,
654 fit_options,
655 z_score=z_score)
656
657 def fit_line(self):
658 """Fit a general line to a one dimensional histogram"""
659 title = "line"
660 formula = "x++1"
661 fit_tf1 = ROOT.TF1("Fit", formula)
662 fit_tf1.SetTitle(title)
663 fit_tf1.SetParName(0, "slope")
664 fit_tf1.SetParName(1, "intercept")
665 self.fit(fit_tf1, 'M')
666
667 def fit_const(self):
668 """Fit a constant function to a one dimensional histogram"""
669 title = "const"
670 formula = "[0]"
671 fit_tf1 = ROOT.TF1("Fit", formula)
672 fit_tf1.SetTitle(title)
673 fit_tf1.SetParName(0, "intercept")
674 self.fit(fit_tf1, 'M')
675
676 def fit_diag(self):
677 """Fit a diagonal line through the origin to a one dimensional histogram"""
678 title = "diag"
679 formula = "[0]*x"
680 fit_tf1 = ROOT.TF1("Fit", formula)
681 fit_tf1.SetTitle(title)
682 fit_tf1.SetParName(0, "slope")
683 self.fit(fit_tf1, 'M')
684
685 def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
686 """Fit a user defined function to a one dimensional histogram
687
688 Parameters
689 ----------
690 formula : str or TF1
691 Formula string or TH1 to be fitted. See TF1 constructors for that is a valid formula
692 options : str
693 Options string to be used in the fit. See TH1::Fit()
694 lower_bound : float
695 Lower bound of the range to be fitted
696 upper_bound : float
697 Upper bound of the range to be fitted
698 """
699 plot = self.plot
700 if plot is None:
701 raise RuntimeError('Validation plot must be filled before it can be fitted.')
702
703 if not isinstance(plot, ROOT.TH1D):
704 raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
705
706 histogram = plot
707
708 xaxis = histogram.GetXaxis()
709 n_bins = xaxis.GetNbins()
710 hist_lower_bound = xaxis.GetBinLowEdge(1)
711 hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
712
713 if z_score is not None:
714 mean = histogram.GetMean()
715 std = histogram.GetStdDev()
716
717 if lower_bound is None:
718 lower_bound = mean - z_score * std
719
720 if upper_bound is None:
721 upper_bound = mean + z_score * std
722
723 # Setup the plotting range of the function to match the histogram
724 if isinstance(formula, ROOT.TF1):
725 fit_tf1 = formula
726 fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
727 else:
728 fit_tf1 = ROOT.TF1("Fit",
729 formula,
730 hist_lower_bound,
731 hist_upper_bound)
732 get_logger().info('Fitting with %s', fit_tf1.GetExpFormula())
733
734 # Determine fit range
735 if lower_bound is None or lower_bound < hist_lower_bound:
736 lower_bound = hist_lower_bound
737 if upper_bound is None or upper_bound > hist_upper_bound:
738 upper_bound = hist_upper_bound
739
740 # Make sure the fitted function is not automatically added since we want to do that one our own.
741 # Look for the documentation of TH1::Fit() for details of the options.
742 if 'N' not in options:
743 options += 'N'
744
745 fit_res = histogram.Fit(fit_tf1, options + "S", "", lower_bound, upper_bound)
746
747 self.set_fit_tf1(histogram, fit_tf1)
748 return fit_res
749
750 def show(self):
751 """Show the plot."""
752 if self.plot:
753 self.plot.Draw()
754 else:
755 raise ValueError("Can not show a validation plot that has not been filled.")
756
757 def write(self, tdirectory=None):
758 """Write the plot to file
759
760 Parameters
761 ----------
762 tdirectory : ROOT.TDirectory, optional
763 ROOT directory to which the plot should be written.
764 If omitted write to the current directory
765 """
766 if not self.plot:
767 raise ValueError("Can not write a validation plot that has not been filled.")
768
769 with root_cd(tdirectory):
770 ValidationPlot.set_tstyle()
771 if self.plot not in self.histograms:
772 self.plot.Write()
773
774 # always disable ROOT's stat plot because it hides items
775 meta_options = ["nostats"]
776
777 # add expert option, if requested
778 if self.is_expert:
779 meta_options.append("expert")
780 else:
781 meta_options.append("shifter")
782
783 # give a custom pvalue warning / error zone if requested
784 if self.pvalue_error is not None:
785 meta_options.append(f"pvalue-error={self.pvalue_error}")
786 if self.pvalue_warn is not None:
787 meta_options.append(f"pvalue-warn={self.pvalue_warn}")
788
789 # Indicator if the y axes should be displayed as a logarithmic scale
790 if self.y_log:
791 meta_options.append("logy")
792
793 meta_options_str = ",".join(meta_options)
794
795 for histogram in self.histograms:
796 histogram.GetListOfFunctions().Add(ROOT.TNamed('MetaOptions', meta_options_str))
797 histogram.Write()
798
799 @property
800 def is_expert(self):
801 """Getter method if an plot plot is marked as expert plot"""
802 return self._is_expert
803
804 @property
805 def title(self):
806 """Getter for the plot title"""
807 return self._title
808
809 @title.setter
810 def title(self, title):
811 """Setter for the plot title"""
812 self._title = title
813 if self.plot:
814 self.plot.SetTitle(title)
815 for histogram in self.histograms:
816 histogram.SetTitle(title)
817
818 @property
819 def xlabel(self):
820 """Getter for the axis label at the x axis"""
821 return self._xlabel
822
823 @xlabel.setter
824 def xlabel(self, xlabel):
825 """Setter for the axis label at the x axis"""
826 self._xlabel = xlabel
827 for histogram in self.histograms:
828 histogram.GetXaxis().SetTitle(xlabel)
829
830 @property
831 def ylabel(self):
832 """Getter for the axis label at the y axis"""
833 return self._ylabel
834
835 @ylabel.setter
836 def ylabel(self, ylabel):
837 """Setter for the axis label at the y axis"""
838 self._ylabel = ylabel
839 for histogram in self.histograms:
840 histogram.GetYaxis().SetTitle(ylabel)
841
842 @property
843 def contact(self):
844 """Getter for the contact email address to be displayed on the validation page"""
845 return self._contact
846
847 @contact.setter
848 def contact(self, contact):
849 """Setter for the contact email address to be displayed on the validation page"""
850 self._contact = contact
851 for histogram in self.histograms:
852 found_obj = histogram.FindObject('Contact')
853 if not found_obj:
854 tnamed = ROOT.TNamed("Contact", contact)
855 histogram.GetListOfFunctions().Add(tnamed)
856 found_obj = histogram.FindObject('Contact')
857 found_obj.SetTitle(contact)
858
859 @property
860 def description(self):
861 """Getter for the description to be displayed on the validation page"""
862 return self._description
863
864 @description.setter
865 def description(self, description):
866 """Setter for the description to be displayed on the validation page"""
867 self._description = description
868 for histogram in self.histograms:
869 found_obj = histogram.FindObject('Description')
870 if not found_obj:
871 tnamed = ROOT.TNamed("Description", description)
872 histogram.GetListOfFunctions().Add(tnamed)
873 found_obj = histogram.FindObject('Description')
874 found_obj.SetTitle(description)
875
876 @property
877 def check(self):
878 """Getter for the check to be displayed on the validation page"""
879 return self._check
880
881 @check.setter
882 def check(self, check):
883 """Setter for the check to be displayed on the validation page"""
884 self._check = check
885 for histogram in self.histograms:
886 found_obj = histogram.FindObject('Check')
887 if not found_obj:
888 tnamed = ROOT.TNamed("Check", check)
889 histogram.GetListOfFunctions().Add(tnamed)
890 found_obj = histogram.FindObject('Check')
891 found_obj.SetTitle(check)
892
893 # Implementation details #
894 # ###################### #
895
896 @staticmethod
897 def unpack_2d_param(param):
898 """Unpacks a function parameter for the two dimensional plots.
899
900 If it is a pair the first parameter shall apply to the x coordinate
901 the second to the y coordinate. In this case the pair is returned as two values
902
903 If something else is given the it is assumed that this parameter should equally apply
904 to both coordinates. In this case the same values is return twice as a pair.
905
906 Parameters
907 ----------
908 param : pair or single value
909 Function parameter for a two dimensional plot
910
911 Returns
912 -------
913 pair
914 A pair of values being the parameter for the x coordinate and
915 the y coordinate respectively
916 """
917 try:
918 if len(param) == 2:
919 x_param, y_param = param
920 except TypeError:
921 x_param = param
922 y_param = param
923 return x_param, y_param
924
925 @staticmethod
926 def is_binary(xs):
927 """Determine if the data consists of boolean values"""
928 return statistics.is_binary_series(xs)
929
930 @staticmethod
931 def is_discrete(xs, max_n_unique=None):
932 """Determine if the data consists of discrete values"""
933 return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
934
935 @staticmethod
936 def get_exceptional_values(xs):
937 """Find exceptionally frequent values
938
939 Parameters
940 ----------
941 xs : np.array (1d)
942 Data series
943
944 Returns
945 -------
946 np.array (1d)
947 A list of the found exceptional values.
948 """
949 return statistics.rice_exceptional_values(xs)
950
951 @staticmethod
952 def get_robust_mean_and_std(xs):
953 """Does an estimation of mean and standard deviation robust against outliers.
954
955 Parameters
956 ----------
957 xs : np.array (1d)
958 Data series
959
960 Returns
961 -------
962 float, float
963 Pair of mean and standard deviation
964 """
965 x_mean = statistics.truncated_mean(xs)
966 x_std = statistics.trimmed_std(xs)
967 return x_mean, x_std
968
969 @staticmethod
970 def format_bin_label(value):
971 """Formats a value to be placed at a tick on an axis."""
972 if np.isfinite(value) and value == np.round(value):
973 return str(int(value))
974 else:
975 formated_value = f"{value:.5g}"
976
977 # if the label is to long, switch to shorter "e" format
978 if len(formated_value) > 8:
979 formated_value = f"{value:.3e}"
980 return formated_value
981
982 def create_1d(self,
983 th1_factory,
984 xs,
985 ys=None,
986 weights=None,
987 bins=None,
988 stackby=None,
989 lower_bound=None,
990 upper_bound=None,
991 outlier_z_score=None,
992 include_exceptionals=True,
993 allow_discrete=False,
994 cumulation_direction=None):
995 """Combined factory method for creating a one dimensional histogram or a profile plot."""
996 name = self.name
997
998 # Coerce values to a numpy array. Do not copy if already a numpy array.
999 xs = np.array(xs, copy=False)
1000
1001 if ys is not None:
1002 ys = np.array(ys, copy=False)
1003
1004 if weights is not None:
1005 weights = np.array(weights, copy=False)
1006
1007 bin_edges, bin_labels = self.determine_bin_edges(xs,
1008 stackbys=stackby,
1009 bins=bins,
1010 lower_bound=lower_bound,
1011 upper_bound=upper_bound,
1012 outlier_z_score=outlier_z_score,
1013 include_exceptionals=include_exceptionals,
1014 allow_discrete=allow_discrete)
1015
1016 n_bins = len(bin_edges) - 1
1017 self.lower_bound = bin_edges[0]
1018 self.upper_bound = bin_edges[-1]
1019 histogram = th1_factory(name, '', n_bins, bin_edges)
1020
1021 if bin_labels:
1022 get_logger().info("One dimensional plot %s is discrete in x.", name)
1023 x_taxis = histogram.GetXaxis()
1024 for i_bin, bin_label in enumerate(bin_labels):
1025 x_taxis.SetBinLabel(i_bin + 1, bin_label)
1026 self.add_stats_entry(histogram, "dx", 1)
1027
1028 else:
1029 bin_width = bin_edges[1] - bin_edges[0]
1030 self.add_stats_entry(histogram, "dx", bin_width)
1031
1032 self.create(histogram,
1033 xs,
1034 ys=ys,
1035 weights=weights,
1036 stackby=stackby,
1037 cumulation_direction=cumulation_direction,
1038 reverse_stack=True)
1039 # Reverse the stack to have the signal distribution at the bottom
1040
1041 # Adjust the discrete bins after the filling to be equidistant
1042 if bin_labels:
1043 for histogram in self.histograms:
1044 x_taxis = histogram.GetXaxis()
1045 bin_edges = array.array("d", list(range(len(bin_labels) + 1)))
1046 x_taxis.Set(n_bins, bin_edges)
1047
1048 def create(self,
1049 histogram_template,
1050 xs,
1051 ys=None,
1052 weights=None,
1053 stackby=None,
1054 cumulation_direction=None,
1055 reverse_stack=None):
1056 """Create histograms from a template, possibly stacked"""
1057
1058 histograms = []
1059
1060 if stackby is None:
1061 histogram = histogram_template
1062 self.fill_into(histogram, xs, ys, weights=weights)
1063 if cumulation_direction is not None:
1064 histogram = self.cumulate(histogram, cumulation_direction=cumulation_direction)
1065
1066 histograms.append(histogram)
1067 plot = histogram
1068
1069 else:
1070 stackby = np.array(stackby, copy=False)
1071 name = histogram_template.GetName()
1072
1073 histograms = self.fill_into_grouped(histogram_template,
1074 xs,
1075 ys=ys,
1076 weights=weights,
1077 groupbys=stackby,
1078 groupby_label="stack")
1079
1080 if cumulation_direction is not None:
1081 histograms = [self.cumulate(histogram, cumulation_direction=cumulation_direction)
1082 for histogram in histograms]
1083
1084 plot = self.create_stack(histograms, name=name + "_stacked", reverse_stack=reverse_stack)
1085
1086 self.histograms = histograms
1087 self.plot = plot
1088 self.attach_attributes()
1089
1090 @classmethod
1091 def create_stack(cls, histograms, name, reverse_stack, force_graph=False):
1092 """Create a stack of histograms"""
1093 if len(histograms) == 1:
1094 plot = histograms[0]
1095 else:
1096 if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph)) or force_graph:
1097 plot = ROOT.TMultiGraph()
1098 else:
1099 plot = ROOT.THStack()
1100
1101 plot.SetName(name)
1102
1103 # Add the histogram in reverse order such
1104 # that the signal usually is on the bottom an well visible
1105 if reverse_stack:
1106 for histogram in reversed(histograms):
1107 if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1108 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1109 plot.Add(histogram, "APZ")
1110 else:
1111 plot.Add(histogram)
1112 else:
1113 for histogram in histograms:
1114 if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1115 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1116 plot.Add(histogram, "APZ")
1117 else:
1118 plot.Add(histogram)
1119
1120 return plot
1121
1122 @classmethod
1123 def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False):
1124 """Extract errors from a TProfile histogram and create a TGraph from these"""
1125 if isinstance(tprofile, ROOT.TGraph):
1126 return tprofile
1127
1128 x_taxis = tprofile.GetXaxis()
1129 n_bins = x_taxis.GetNbins()
1130
1131 # bin_ids_with_underflow = list(range(n_bins + 1))
1132 bin_ids_without_underflow = list(range(1, n_bins + 1))
1133
1134 bin_centers = np.array([x_taxis.GetBinCenter(i_bin) for i_bin in bin_ids_without_underflow])
1135 if abs_x:
1136 bin_centers = np.abs(bin_centers)
1137 bin_widths = np.array([x_taxis.GetBinWidth(i_bin) for i_bin in bin_ids_without_underflow])
1138 bin_x_errors = bin_widths / 2.0
1139
1140 # Now for the histogram content not including the underflow
1141 bin_contents = np.array([tprofile.GetBinContent(i_bin) for i_bin in bin_ids_without_underflow])
1142 bin_y_errors = np.array([tprofile.GetBinError(i_bin) for i_bin in bin_ids_without_underflow])
1143
1144 tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1145
1146 tgrapherrors.GetHistogram().SetOption("APZ") # <-- This has no effect?
1147
1148 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1149 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1150
1151 # Copy all functions and stats entries
1152 for tobject in tprofile.GetListOfFunctions():
1153 tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1154
1155 # Add stats entries that are displayed for profile plots
1156 cls.add_stats_entry(tgrapherrors, 'count', tprofile.GetEntries())
1157
1158 stats_values = np.array([np.nan] * 6)
1159 tprofile.GetStats(stats_values)
1160
1161 sum_w = stats_values[0]
1162 # sum_w2 = stats_values[1]
1163 sum_wx = stats_values[2]
1164 sum_wx2 = stats_values[3]
1165 sum_wy = stats_values[4]
1166 sum_wy2 = stats_values[5]
1167
1168 cls.add_stats_entry(tgrapherrors,
1169 "x avg",
1170 sum_wx / sum_w)
1171
1172 cls.add_stats_entry(tgrapherrors,
1173 "x std",
1174 np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1175
1176 cls.add_stats_entry(tgrapherrors,
1177 "y avg",
1178 sum_wy / sum_w)
1179
1180 cls.add_stats_entry(tgrapherrors,
1181 "y std",
1182 np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1183
1184 cls.add_stats_entry(tgrapherrors,
1185 'cov',
1186 tgrapherrors.GetCovariance())
1187
1188 cls.add_stats_entry(tgrapherrors,
1189 'corr',
1190 tgrapherrors.GetCorrelationFactor())
1191
1192 return tgrapherrors
1193
1194 def fill_into_grouped(self,
1195 histogram_template,
1196 xs,
1197 ys=None,
1198 weights=None,
1199 groupbys=None,
1200 groupby_label="group"):
1201 """Fill data into similar histograms in groups indicated by a groupby array"""
1202
1203 histograms = []
1204 unique_groupbys = np.unique(groupbys)
1205 name = histogram_template.GetName()
1206
1207 for i_value, value in enumerate(unique_groupbys):
1208 if np.isnan(value):
1209 indices_for_value = np.isnan(groupbys)
1210 else:
1211 indices_for_value = groupbys == value
1212
1213 # Make a copy of the empty histogram
1214 histogram_for_value = histogram_template.Clone(name + '_' + str(value))
1215 i_root_color = i_value + 1
1216
1217 self.set_color(histogram_for_value, i_root_color)
1218
1219 if groupby_label:
1220 self.add_stats_entry(histogram_for_value, groupby_label, value)
1221
1222 self.fill_into(histogram_for_value,
1223 xs,
1224 ys=ys,
1225 weights=weights,
1226 filter=indices_for_value)
1227
1228 histograms.append(histogram_for_value)
1229
1230 return histograms
1231
1232 def set_color(self, tobject, root_i_color):
1233 """Set the color of the ROOT object.
1234
1235 By default the line color of a TGraph should be invisible, so do not change it
1236 For other objects set the marker and the line color
1237
1238 Parameters
1239 ----------
1240 tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1241 Object of which the color should be set.
1242 root_i_color : int
1243 Color index of the ROOT color table
1244 """
1245 if isinstance(tobject, ROOT.TGraph):
1246 tobject.SetMarkerColor(root_i_color)
1247 else:
1248 tobject.SetLineColor(root_i_color)
1249 tobject.SetMarkerColor(root_i_color)
1250
1251 def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1252 """Fill the data into the plot object"""
1253 if isinstance(plot, ROOT.TGraph):
1254 if ys is None:
1255 raise ValueError("ys are required for filling a graph")
1256 self.fill_into_tgraph(plot, xs, ys, filter=filter)
1257 elif isinstance(plot, ROOT.TGraphErrors):
1258 if ys is None:
1259 raise ValueError("ys are required for filling a graph error")
1260
1261 self.fill_into_tgrapherror(plot, xs, ys)
1262 else:
1263 self.fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1264
1265 def fill_into_tgrapherror(self, graph, xs, ys, filter=None):
1266 """fill point values and error of the x and y axis into the graph"""
1267
1268 assert (len(xs[0]) == len(ys[0]))
1269 # set the overall amount of points
1270 graph.Set(len(xs[0]))
1271
1272 for i in range(len(xs[0])):
1273 graph.SetPoint(i, xs[0][i], ys[0][i])
1274 graph.SetPointError(i, xs[1][i], ys[1][i])
1275
1276 def fill_into_tgraph(self, graph, xs, ys, filter=None):
1277 """Fill the data into a TGraph"""
1278
1279 # Save some ifs by introducing a dummy slicing as a non filter
1280 if filter is None:
1281 filter = slice(None)
1282
1283 xs = xs[filter]
1284 ys = ys[filter]
1285
1286 max_n_data = 100000
1287 x_n_data = len(xs)
1288 y_n_data = len(ys)
1289
1290 if max_n_data:
1291 if x_n_data > max_n_data or y_n_data > max_n_data:
1292 get_logger().warning(f"Number of points in scatter graph {self.name} exceed limit {max_n_data}")
1293
1294 get_logger().warning(f"Cropping {max_n_data}")
1295
1296 xs = xs[0:max_n_data]
1297 ys = ys[0:max_n_data]
1298
1299 x_axis = graph.GetXaxis()
1300 y_axis = graph.GetYaxis()
1301
1302 x_lower_bound = x_axis.GetXmin()
1303 x_upper_bound = x_axis.GetXmax()
1304
1305 y_lower_bound = y_axis.GetXmin()
1306 y_upper_bound = y_axis.GetXmax()
1307
1308 x_underflow_indices = xs < x_lower_bound
1309 x_overflow_indices = xs > x_upper_bound
1310
1311 y_underflow_indices = ys < y_lower_bound
1312 y_overflow_indices = ys > y_upper_bound
1313
1314 plot_indices = ~(np.isnan(xs) |
1315 x_underflow_indices |
1316 x_overflow_indices |
1317 np.isnan(ys) |
1318 y_underflow_indices |
1319 y_overflow_indices)
1320
1321 n_plot_data = np.sum(plot_indices)
1322 plot_xs = xs[plot_indices]
1323 plot_ys = ys[plot_indices]
1324
1325 graph.Set(int(n_plot_data))
1326 for i, (x, y) in enumerate(zip(plot_xs, plot_ys)):
1327 graph.SetPoint(i, x, y)
1328
1329 self.add_stats_entry(graph, 'count', np.sum(np.isfinite(xs)))
1330
1331 self.add_nan_inf_stats(graph, 'x', xs)
1332 self.add_nan_inf_stats(graph, 'y', ys)
1333
1334 x_n_underflow = np.sum(x_underflow_indices)
1335 if x_n_underflow:
1336 self.add_stats_entry(graph, 'x underf.', x_n_underflow)
1337
1338 x_n_overflow = np.sum(x_overflow_indices)
1339 if x_n_overflow:
1340 self.add_stats_entry(graph, 'x overf.', x_n_overflow)
1341
1342 y_n_underflow = np.sum(y_underflow_indices)
1343 if y_n_underflow:
1344 self.add_stats_entry(graph, 'y underf.', y_n_underflow)
1345
1346 y_n_overflow = np.sum(y_overflow_indices)
1347 if y_n_overflow:
1348 self.add_stats_entry(graph, 'y overf.', y_n_overflow)
1349
1350 self.add_stats_entry(graph, 'x avg', graph.GetMean(1))
1351 self.add_stats_entry(graph, 'x std', graph.GetRMS(1))
1352
1353 self.add_stats_entry(graph, 'y avg', graph.GetMean(2))
1354 self.add_stats_entry(graph, 'y std', graph.GetRMS(2))
1355
1356 self.add_stats_entry(graph, 'cov', graph.GetCovariance())
1357 self.add_stats_entry(graph, 'corr', graph.GetCorrelationFactor())
1358
1359 def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None):
1360 """Fill the histogram blocking non finite values
1361
1362 Parameters
1363 ----------
1364 histogram : ROOT.TH1
1365 The histogram to be filled
1366 xs : numpy.ndarray (1d)
1367 Data for the first axes
1368 ys : numpy.ndarray (1d), optional
1369 Data for the second axes
1370 weights : numpy.ndarray (1d), optional
1371 Weight of the individual points. Defaults to one for each
1372 filter : numpy.ndarray, optional
1373 Boolean index array indicating which entries shall be taken.
1374 """
1375 # Save some ifs by introducing a dummy slicing as a non filter
1376 if filter is None:
1377 filter = slice(None)
1378
1379 xs = xs[filter]
1380 # Count the nan and inf values in x
1381 self.add_nan_inf_stats(histogram, 'x', xs)
1382 finite_filter = np.isfinite(xs)
1383
1384 if ys is not None:
1385 ys = ys[filter]
1386 # Count the nan and inf values in y
1387 self.add_nan_inf_stats(histogram, 'y', ys)
1388 finite_filter &= np.isfinite(ys)
1389
1390 if weights is None:
1391 xs = xs[finite_filter]
1392 weights = np.ones_like(xs)
1393 else:
1394 weights = weights[filter]
1395 self.add_nan_inf_stats(histogram, 'w', weights)
1396 finite_filter &= np.isfinite(weights)
1397 xs = xs[finite_filter]
1398 weights[finite_filter]
1399
1400 if ys is not None:
1401 ys = ys[finite_filter]
1402
1403 # Now fill the actual histogram
1404 try:
1405 histogram.FillN
1406 except AttributeError:
1407 Fill = histogram.Fill
1408 if ys is None:
1409 fill = np.frompyfunc(Fill, 2, 1)
1410 fill(xs.astype(np.float64, copy=False),
1411 weights.astype(np.float64, copy=False))
1412 else:
1413 fill = np.frompyfunc(Fill, 3, 1)
1414 fill(xs.astype(np.float64, copy=False),
1415 ys.astype(np.float64, copy=False),
1416 weights.astype(np.float64, copy=False))
1417 else:
1418 if ys is None:
1419 # Make the array types compatible with the ROOT interface if necessary
1420 xs = xs.astype(np.float64, copy=False)
1421 weights = weights.astype(np.float64, copy=False)
1422 n = len(xs)
1423 if n != 0:
1424 histogram.FillN(n, xs, weights)
1425 else:
1426 basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1427
1428 else:
1429 # Make the array types compatible with the ROOT interface if necessary
1430 xs = xs.astype(np.float64, copy=False)
1431 ys = ys.astype(np.float64, copy=False)
1432 weights = weights.astype(np.float64, copy=False)
1433 n = len(xs)
1434 if n != 0:
1435 histogram.FillN(n, xs, ys, weights)
1436 else:
1437 basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1438
1439 self.set_additional_stats_tf1(histogram)
1440
1441 @classmethod
1442 def add_nan_inf_stats(cls, histogram, name, xs):
1443 """ Extracts the counts of non finite floats from a series
1444 and adds them as additional statistics to the histogram.
1445
1446 Parameters
1447 ----------
1448 histogram : derived from ROOT.TH1 or ROOT.TGraph
1449 Something having a GetListOfFunctions method that
1450 name : str
1451 A label for the data series to be prefixed to the entries.
1452 xs : numpy.ndarray (1d)
1453 Data from which the non finit floats should be counted.
1454 """
1455 n_nans = np.isnan(xs).sum()
1456 if n_nans > 0:
1457 cls.add_stats_entry(histogram, name + ' nan', n_nans)
1458
1459 n_positive_inf = np.sum(xs == np.inf)
1460 if n_positive_inf > 0:
1461 cls.add_stats_entry(histogram, name + ' pos inf', n_positive_inf)
1462
1463 n_negative_inf = np.sum(xs == -np.inf)
1464 if n_negative_inf > 0:
1465 cls.add_stats_entry(histogram, name + ' neg inf', n_negative_inf)
1466
1467 @classmethod
1468 def add_stats_entry(cls, histogram, label, value):
1469 """Add a new additional statistics to the histogram.
1470
1471 Parameters
1472 ----------
1473 histogram : derived from ROOT.TH1 or ROOT.TGraph
1474 Something having a GetListOfFunctions method that holds the additional statistics
1475 label : str
1476 Label of the statistic
1477 value : float
1478 Value of the statistic
1479 """
1480 stats_entry = StatsEntry(str(label), float(value))
1481 histogram.GetListOfFunctions().Add(stats_entry)
1482 cls.set_additional_stats_tf1(histogram)
1483
1484 @classmethod
1485 def get_additional_stats(cls, histogram):
1486 """Get the additional statistics from the histogram and return them a dict.
1487
1488 Parameters
1489 ----------
1490 histogram : derived from ROOT.TH1 or ROOT.TGraph
1491 Something having a GetListOfFunctions method that holds the additional statistics
1492
1493 Returns
1494 -------
1495 collection.OrderedDict
1496 A map of labels to values for the additional statistics
1497 """
1498 additional_stats = collections.OrderedDict()
1499 for tobject in histogram.GetListOfFunctions():
1500 if isinstance(tobject, StatsEntry):
1501 stats_entry = tobject
1502 label = stats_entry.GetName()
1503 value = stats_entry.GetVal()
1504 additional_stats[label] = value
1505 return additional_stats
1506
1507 @classmethod
1508 def gaus_slice_fit(cls, th2, name, z_score=None):
1509 """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1510 # profile = th2.ProfileX(name)
1511
1512 y_taxis = th2.GetYaxis()
1513 th2_lower_bound = y_taxis.GetXmin()
1514 th2_upper_bound = y_taxis.GetXmax()
1515 th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1516 n_y_bins = y_taxis.GetNbins()
1517 if z_score:
1518 y_mean = th2.GetMean(2)
1519 y_std = th2.GetStdDev(2)
1520 fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1521 fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1522 fit_height = fit_upper_bound - fit_lower_bound
1523 # Require all covered bins to be filled
1524 required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1525 else:
1526 fit_lower_bound = th2_lower_bound
1527 fit_upper_bound = th2_upper_bound
1528 fit_height = fit_upper_bound - fit_lower_bound
1529 required_n_bins_inslice_filled = n_y_bins / 1.61
1530
1531 # Highest required number of bins should be a third
1532 required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1533
1534 fit_tf1 = ROOT.TF1("Fit", "gaus", fit_lower_bound, fit_upper_bound)
1535 fit_tf1.SetParName(0, "n")
1536 fit_tf1.SetParName(1, "mean")
1537 fit_tf1.SetParName(2, "std")
1538 i_first_bin = 0
1539 i_last_bin = -1
1540 fit_options = "QNR"
1541 param_fit_th1s = ROOT.TObjArray()
1542 th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1543 int(required_n_bins_inslice_filled),
1544 fit_options, param_fit_th1s)
1545
1546 th1_means = param_fit_th1s.At(1)
1547 th1_means.SetName(name)
1548 th1_means.SetTitle(th2.GetTitle())
1549 for label, value in cls.get_additional_stats(th2).items():
1550 cls.add_stats_entry(th1_means, label, value)
1551
1552 # Manually copy labels grumble grumble
1553 x_taxis = th2.GetXaxis()
1554 new_x_taxis = th1_means.GetXaxis()
1555 for i_bin in range(x_taxis.GetNbins() + 2):
1556 label = x_taxis.GetBinLabel(i_bin)
1557 if label != "":
1558 new_x_taxis.SetBinLabel(i_bin, label)
1559
1560 # Adjust plot bound to reflect the fit range.
1561 data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1562 data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1563 data_height = data_upper_bound - data_lower_bound
1564
1565 plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1566 plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1567
1568 th1_means.SetMinimum(plot_lower_bound)
1569 th1_means.SetMaximum(plot_upper_bound)
1570
1571 return th1_means
1572
1573 @classmethod
1574 def cumulate(cls, histogram, cumulation_direction=None):
1575 """Cumulates the histogram inplace.
1576
1577 Parameters
1578 ----------
1579 histogram : ROOT.TH1 or ROOT.TProfile
1580 Filled histogram to be cumulated
1581 cumulation_direction : int, optional
1582 Direction is indicated by the sign.
1583 Positive means from left to right, negative means from right to left.
1584 If now cumulation direction is given return the histogram as is.
1585
1586 Returns
1587 -------
1588 ROOT.TH1
1589 Cumulated histogram potentially altered inplace.
1590 """
1591 if not cumulation_direction:
1592 return histogram
1593
1594 cumulate_backward = cumulation_direction < 0
1595 cumulate_forward = not cumulate_backward
1596
1597 if isinstance(histogram, ROOT.TH2):
1598 raise ValueError("Cannot cumulate a two dimensional histogram.")
1599
1600 if isinstance(histogram, ROOT.TH3):
1601 raise ValueError("Cannot cumulate a three dimensional histogram.")
1602
1603 if not isinstance(histogram, ROOT.TH1):
1604 raise ValueError("Can only cumulate a one dimensional histogram.")
1605
1606 if isinstance(histogram, ROOT.TProfile):
1607 tprofile = histogram
1608 # Turn the profile histogram into graph where we can set the new content and errors
1609 tgraph = cls.convert_tprofile_to_tgrapherrors(histogram)
1610 tgraph.SetName(tprofile.GetName())
1611
1612 n_bins = histogram.GetNbinsX()
1613 n_points = n_bins
1614 cumulated_content = 0.0
1615 cumulated_entries = 0
1616 cumulated_std = 0.0
1617
1618 # Always include the overflow bins.
1619 i_bins = list(range(0, n_bins + 2))
1620 if not cumulate_forward:
1621 i_bins = reversed(i_bins)
1622
1623 for i_bin in i_bins:
1624 i_point = i_bin - 1
1625 bin_content = tprofile.GetBinContent(i_bin)
1626 bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1627 bin_std = tprofile.GetBinError(i_bin)
1628
1629 if bin_entries != 0:
1630 cumulated_content = (
1631 1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1632 (cumulated_entries + bin_entries)
1633 )
1634
1635 cumulated_std = (
1636 math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1637 (cumulated_entries + bin_entries)
1638 )
1639
1640 cumulated_entries = cumulated_entries + bin_entries
1641 else:
1642 # empty bin does not add anything to the cumulation
1643 pass
1644
1645 if i_point >= 0 and i_point < n_points:
1646 x = tgraph.GetX()[i_point]
1647 # x = ROOT.Double()
1648 # y = ROOT.Double()
1649 # tgraph.GetPoint(i_point, x, y)
1650 tgraph.SetPoint(i_point, x, cumulated_content)
1651
1652 x_error = tgraph.GetErrorX(i_point)
1653 tgraph.SetPointError(i_point, x_error, cumulated_std)
1654 return tgraph
1655
1656 else:
1657 # Always include the overflow bins.
1658 n_bins = histogram.GetNbinsX()
1659 cumulated_content = 0.0
1660
1661 i_bins = list(range(0, n_bins + 2))
1662 if not cumulate_forward:
1663 i_bins = reversed(i_bins)
1664
1665 for i_bin in i_bins:
1666 bin_content = histogram.GetBinContent(i_bin)
1667 cumulated_content += bin_content
1668 histogram.SetBinContent(i_bin, cumulated_content)
1669
1670 return histogram
1671
1672 def determine_bin_edges(self,
1673 xs,
1674 stackbys=None,
1675 bins=None,
1676 lower_bound=None,
1677 upper_bound=None,
1678 outlier_z_score=None,
1679 include_exceptionals=True,
1680 allow_discrete=False):
1681 """Deducing bin edges from a data series.
1682
1683 Parameters
1684 ----------
1685 xs : numpy.ndarray (1d)
1686 Data point for which a binning should be found.
1687 stackbys : numpy.ndarray (1d)
1688 Categories of the data points to be distinguishable
1689 bins : list(float) or int or None, optional
1690 Preset bin edges or preset number of desired bins.
1691 The default, None, means the bound should be extracted from data.
1692 The rice rule is used the determine the number of bins.
1693 If a list of floats is given return them immediately.
1694 lower_bound : float or None, optional
1695 Preset lower bound of the binning range.
1696 The default, None, means the bound should be extracted from data.
1697 upper_bound : float or None, optional
1698 Preset upper bound of the binning range.
1699 The default, None, means the bound should be extracted from data.
1700 outlier_z_score : float or None, optional
1701 Threshold z-score of outlier detection.
1702 The default, None, means no outlier detection.
1703 include_exceptionals : bool, optional
1704 If the outlier detection is active this switch indicates,
1705 if values detected as exceptionally frequent shall be included
1706 nevertheless into the binning range. Default is True,
1707 which means exceptionally frequent values as included
1708 even if they are detected as outliers.
1709
1710 Returns
1711 -------
1712 np.array (1d), list(str)
1713 Pair of bin edges and labels deduced from the series.
1714 Second element is None if the series is not detected as discrete.
1715 """
1716 debug = get_logger().debug
1717 debug('Determine binning for plot named %s', self.name)
1718
1719 if bins == 'flat':
1720 # Special value for the flat distribution binning
1721 n_bins = None
1722
1723 elif isinstance(bins, collections.abc.Iterable):
1724 # Bins is considered as an array
1725 # Construct a float array forwardable to root.
1726 bin_edges = bins
1727 bin_edges = array.array('d', bin_edges)
1728 bin_labels = None
1729 return bin_edges, bin_labels
1730
1731 # If bins is not an iterable assume it is the number of bins or None
1732 elif bins is None:
1733 n_bins = None
1734 else:
1735 # Check that bins can be coerced to an integer.
1736 n_bins = int(bins)
1737
1738 # Do not allow negative bin numbers
1739 if not n_bins > 0:
1740 message = f'Cannot accept n_bins={bins} as number of bins, because it is not a number greater than 0.'
1741 raise ValueError(message)
1742
1743 # Coerce values to a numpy array. Do not copy if already a numpy array.
1744 xs = np.array(xs, copy=False)
1745
1746 if self.is_binary(xs) or (allow_discrete and self.is_discrete(xs)):
1747 # This covers also the case
1748 debug('Discrete binning values encountered')
1749 finite_xs = xs[np.isfinite(xs)]
1750 unique_xs = np.unique(finite_xs)
1751
1752 # Crop the unique values between the lower and upper bound
1753 if lower_bound is None:
1754 if len(unique_xs) == 0:
1755 if upper_bound is None:
1756 lower_bound = 0
1757 else:
1758 lower_bound = upper_bound - 1
1759 else:
1760 lower_bound = np.min(unique_xs)
1761 else:
1762 unique_xs = unique_xs[unique_xs >= lower_bound]
1763
1764 if upper_bound is None:
1765 if len(unique_xs) == 0:
1766 upper_bound = lower_bound + 1
1767 else:
1768 upper_bound = np.min(unique_xs)
1769 else:
1770 unique_xs = unique_xs[unique_xs <= upper_bound]
1771
1772 if n_bins is None:
1773 n_bins = len(unique_xs) or 1
1774
1775 if len(unique_xs) > 0 and n_bins >= len(unique_xs):
1776 # Construct a float array forwardable to root.
1777 bin_edges = array.array('d', unique_xs)
1778 format_bin_label = self.format_bin_label
1779 bin_labels = [format_bin_label(value) for value in bin_edges]
1780 bin_edges.append(bin_edges[-1] + 1)
1781 return bin_edges, bin_labels
1782
1783 else:
1784 # Ambiguous case what to do in case of a number of requested bins
1785 # that is lower than the number of unique values?
1786
1787 # Continue with an equistant binning for now.
1788 pass
1789
1790 debug('Lower bound %s', lower_bound)
1791 debug('Upper bound %s', upper_bound)
1792 debug('N bins %s', n_bins)
1793
1794 else:
1795 bin_range = self.determine_bin_range(xs,
1796 stackbys=stackbys,
1797 n_bins=n_bins,
1798 lower_bound=lower_bound,
1799 upper_bound=upper_bound,
1800 outlier_z_score=outlier_z_score,
1801 include_exceptionals=include_exceptionals)
1802
1803 n_bins, lower_bound, upper_bound = bin_range
1804
1805 n_bin_edges = n_bins + 1
1806 if lower_bound != upper_bound:
1807 if bins == "flat":
1808 debug("Creating flat distribution binning")
1809 percentiles = np.linspace(0.0, 100.0, n_bin_edges)
1810 bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], percentiles))
1811 else:
1812 # Correct the upper bound such that all values are strictly smaller than the upper bound
1813 # Make one step in single precision in the positive direction
1814 bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1815
1816 # Reinforce the upper and lower bound to be exact
1817 # Also expand the upper bound by an epsilon
1818 # to prevent the highest value in xs from going in the overflow bin
1819 bin_edges[0] = lower_bound
1820 bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1821 debug('Bins %s', bin_edges)
1822
1823 else:
1824 # Fall back if the array contains only one value
1825 bin_edges = [lower_bound, upper_bound + 1]
1826
1827 # Construct a float array forwardable to root.
1828 bin_edges = array.array('d', bin_edges)
1829 debug('Bins %s for %s', bin_edges, self.name)
1830 return bin_edges, None
1831
1832 def determine_bin_range(self,
1833 xs,
1834 stackbys=None,
1835 n_bins=None,
1836 lower_bound=None,
1837 upper_bound=None,
1838 outlier_z_score=None,
1839 include_exceptionals=True):
1840 """Calculates the number of bins, the lower bound and the upper bound from a given data series
1841 estimating the values that are not given.
1842
1843 If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1844 The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1845 mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1846 quantile range (IQR).
1847
1848 If additional include_exceptionals is true the method tries to find exceptional values in the series
1849 and always include them in the range if it finds any.
1850 Exceptional values means exact values that appear often in the series for whatever reason.
1851 Possible reasons include
1852 * Integral / default values
1853 * Failed evaluation conditions
1854 * etc.
1855 which should be not cropped away automatically if you are locking on the quality of your data.
1856
1857 Parameters
1858 ----------
1859 xs : numpy.ndarray (1d)
1860 Data point for which a binning should be found.
1861 stackbys : numpy.ndarray (1d)
1862 Categories of the data points to be distinguishable
1863 n_bins : int or None, optional
1864 Preset number of desired bins. The default, None, means the bound should be extracted from data.
1865 The rice rule is used the determine the number of bins.
1866 lower_bound : float or None, optional
1867 Preset lower bound of the binning range.
1868 The default, None, means the bound should be extracted from data.
1869 upper_bound : float or None, optional
1870 Preset upper bound of the binning range.
1871 The default, None, means the bound should be extracted from data.
1872 outlier_z_score : float or None, optional
1873 Threshold z-score of outlier detection.
1874 The default, None, means no outlier detection.
1875 include_exceptionals : bool, optional
1876 If the outlier detection is active this switch indicates,
1877 if values detected as exceptionally frequent shall be included
1878 nevertheless into the binning range. Default is True,
1879 which means exceptionally frequent values as included
1880 even if they are detected as outliers.
1881
1882 Returns
1883 -------
1884 n_bins, lower_bound, upper_bound : int, float, float
1885 A triple of found number of bins, lower bound and upper bound of the binning range.
1886 """
1887
1888 if stackbys is not None:
1889 unique_stackbys = np.unique(stackbys)
1890 stack_ranges = []
1891 for value in unique_stackbys:
1892 if np.isnan(value):
1893 indices_for_value = np.isnan(stackbys)
1894 else:
1895 indices_for_value = stackbys == value
1896
1897 stack_lower_bound, stack_upper_bound = \
1898 self.determine_range(xs[indices_for_value],
1899 lower_bound=lower_bound,
1900 upper_bound=upper_bound,
1901 outlier_z_score=outlier_z_score,
1902 include_exceptionals=include_exceptionals)
1903
1904 stack_ranges.append([stack_lower_bound, stack_upper_bound])
1905
1906 lower_bound = np.nanmin([lwb for lwb, upb in stack_ranges])
1907 upper_bound = np.nanmax([upb for lwb, upb in stack_ranges])
1908
1909 else:
1910 lower_bound, upper_bound = self.determine_range(xs,
1911 lower_bound=lower_bound,
1912 upper_bound=upper_bound,
1913 outlier_z_score=outlier_z_score,
1914 include_exceptionals=include_exceptionals)
1915
1916 if n_bins is None:
1917 # Assume number of bins according to the rice rule.
1918 # The number of data points should not include outliers.
1919 n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1920 rice_n_bins = int(statistics.rice_n_bin(n_data))
1921 n_bins = rice_n_bins
1922
1923 else:
1924 n_bins = int(n_bins)
1925 # Do not allow negative bin numbers
1926 if not n_bins > 0:
1927 message = f'Cannot accept n_bins={n_bins} as number of bins, because it is not a number greater than 0.'
1928 raise ValueError(message)
1929
1930 return n_bins, lower_bound, upper_bound
1931
1932 def determine_range(self,
1933 xs,
1934 lower_bound=None,
1935 upper_bound=None,
1936 outlier_z_score=None,
1937 include_exceptionals=True):
1938 """
1939 Parameters
1940 ----------
1941 xs : numpy.ndarray (1d)
1942 Data point for which a binning should be found.
1943 lower_bound : float or None, optional
1944 Preset lower bound of the binning range.
1945 The default, None, means the bound should be extracted from data.
1946 upper_bound : float or None, optional
1947 Preset upper bound of the binning range.
1948 The default, None, means the bound should be extracted from data.
1949 outlier_z_score : float or None, optional
1950 Threshold z-score of outlier detection.
1951 The default, None, means no outlier detection.
1952 include_exceptionals : bool, optional
1953 If the outlier detection is active this switch indicates,
1954 if values detected as exceptionally frequent shall be included
1955 nevertheless into the binning range. Default is True,
1956 which means exceptionally frequent values as included
1957 even if they are detected as outliers.
1958
1959 Returns
1960 -------
1961 lower_bound, upper_bound : float, float
1962 A pair of found lower bound and upper bound of series.
1963 """
1964 debug = get_logger().debug
1965
1966 finite_xs_indices = np.isfinite(xs)
1967 if np.any(finite_xs_indices):
1968 finite_xs = xs[finite_xs_indices]
1969 else:
1970 finite_xs = xs
1971
1972 make_symmetric = False
1973 exclude_outliers = outlier_z_score is not None and (lower_bound is None or upper_bound is None)
1974
1975 # Look for exceptionally frequent values in the series, e.g. integral delta values like -999
1976 if include_exceptionals or exclude_outliers:
1977 exceptional_xs = self.get_exceptional_values(finite_xs)
1978 exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1979
1980 # Prepare for the estimation of outliers
1981 if exclude_outliers:
1982 if not np.all(exceptional_indices):
1983 # Exclude exceptional values from the estimation to be unbiased
1984 # even in case exceptional values fall into the central region near the mean
1985 x_mean, x_std = self.get_robust_mean_and_std(finite_xs[~exceptional_indices])
1986 else:
1987 x_mean, x_std = np.nan, np.nan
1988
1989 make_symmetric = abs(x_mean) < x_std / 5.0 and lower_bound is None and upper_bound is None
1990
1991 if include_exceptionals and len(exceptional_xs) != 0:
1992 lower_exceptional_x = np.min(exceptional_xs)
1993 upper_exceptional_x = np.max(exceptional_xs)
1994 make_symmetric = False
1995 else:
1996 lower_exceptional_x = np.nan
1997 upper_exceptional_x = np.nan
1998
1999 # Find the lower bound, if it is not given.
2000 if lower_bound is None:
2001 try:
2002 lower_bound = np.min(finite_xs)
2003 except ValueError:
2004 lower_bound = -999
2005 # Clip the lower bound by outliers that exceed the given z score
2006 if outlier_z_score is not None:
2007 # The lower bound at which outliers exceed the given z score
2008 lower_outlier_bound = x_mean - outlier_z_score * x_std
2009
2010 # Clip the lower bound such that it concides with an actual value,
2011 # which prevents empty bins from being produced
2012 indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2013
2014 if np.any(indices_above_lower_outlier_bound):
2015 lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2016
2017 # However we want to include at least the exceptional values in the range if there are any.
2018 lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2019
2020 debug('Lower bound after outlier detection')
2021 debug('Lower bound %s', lower_bound)
2022 debug('Lower outlier bound %s', lower_outlier_bound)
2023
2024 # Find the upper bound, if it is not given
2025 if upper_bound is None:
2026 try:
2027 upper_bound = np.max(finite_xs)
2028 except ValueError:
2029 upper_bound = 999
2030 if outlier_z_score is not None:
2031 # The upper bound at which outliers exceed the given z score
2032 upper_outlier_bound = x_mean + outlier_z_score * x_std
2033
2034 # Clip the upper bound such that it concides with an actual value,
2035 # which prevents empty bins from being produced
2036 indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2037
2038 if np.any(indices_below_upper_outlier_bound):
2039 upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2040
2041 # However we want to include at least the exceptional values in the range if there are any.
2042 upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2043
2044 debug('Upper bound after outlier detection')
2045 debug('Upper bound %s', upper_bound)
2046 debug('Upper outlier bound %s', upper_outlier_bound)
2047
2048 if make_symmetric and lower_bound < 0 and upper_bound > 0:
2049 if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2050 abs_bound = max(abs(lower_bound), abs(upper_bound))
2051 lower_bound = -abs_bound
2052 upper_bound = abs_bound
2053
2054 return lower_bound, upper_bound
2055
2056 @classmethod
2057 def set_additional_stats_tf1(cls, histogram):
2058 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2059
2060 Parameters
2061 ----------
2062 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2063 Something having a GetListOfFunctions method that should hold
2064 the combined fit and additional statistics function.
2065 """
2066 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2067 cls.set_tf1(histogram, additional_stats_tf1)
2068
2069 @classmethod
2070 def set_fit_tf1(cls, histogram, fit_tf1):
2071 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2072
2073 Parameters
2074 ----------
2075 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2076 Something having a GetListOfFunctions method that should hold
2077 the combined fit and additional statistics function.
2078 """
2079 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2080 combined_tf1 = cls.combine_fit_and_additional_stats(fit_tf1, additional_stats_tf1)
2081 cls.set_tf1(histogram, combined_tf1)
2082
2083 @classmethod
2084 def set_tf1(cls, histogram, tf1):
2085 """Set the attached TF1 of the histogram.
2086
2087 Parameters
2088 ----------
2089 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2090 Something having a GetListOfFunctions method that should hold
2091 the combined fit and additional statistics function.
2092 """
2093 # Delete any functions formally added
2094 cls.delete_tf1(histogram)
2095 if tf1 is not None:
2096 tf1.SetName("FitAndStats")
2097 histogram.GetListOfFunctions().Add(tf1)
2098
2099 @classmethod
2100 def delete_tf1(cls, histogram):
2101 """Delete the attached TF1 from the histogram
2102
2103 Parameters
2104 ----------
2105 histogram : ROOT.TH1 or ROOT.TGraph
2106 Something having a GetListOfFunctions method that holds the fit function
2107 """
2108 tf1 = histogram.FindObject("FitAndStats")
2109 if tf1:
2110 function_list = histogram.GetListOfFunctions()
2111 function_list.Remove(tf1)
2112
2113 @classmethod
2114 def create_additional_stats_tf1(cls, histogram):
2115 """Create a TF1 with the additional statistics from the histogram as parameters.
2116
2117 Parameters
2118 ----------
2119 histogram : ROOT.TH1 or ROOT.TGraph
2120 Something having a GetListOfFunctions method that holds the additional statistics.
2121
2122 Returns
2123 -------
2124 ROOT.TF1
2125 Function with the additional statistics as parameters.
2126 """
2127
2128 additional_stats = cls.get_additional_stats(histogram)
2129 if not additional_stats:
2130 return None
2131
2132 # Create dummy function, which displays additional statistics in the legend, when added to a histogram.
2133 # Dummy range to serve the functions
2134 lower_bound = 0
2135 upper_bound = 0
2136
2137 # Create a formula which is zero in all cases but has space for n parameters
2138 # Formula string looks like 0*[0]+0*[1]+0*[2]+...
2139 formula_string = '+'.join('0*[' + str(i) + ']' for i in range(len(additional_stats)))
2140
2141 # Compose a function that carries the additional information
2142 additional_stats_tf1 = ROOT.TF1("Stats", formula_string, lower_bound, upper_bound)
2143
2144 for (i, (label, value)) in enumerate(additional_stats.items()):
2145 # root 6 does not like labels with spaces ..
2146 # bug report: https://sft.its.cern.ch/jira/browse/ROOT-7460
2147 # therefore this workaround:
2148 label = label.replace(" ", "-")
2149 additional_stats_tf1.SetParName(i, label)
2150 additional_stats_tf1.FixParameter(i, value)
2151
2152 return additional_stats_tf1
2153
2154 @classmethod
2155 def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1):
2156 """Combine the fit function and the function carrying the additional statistics to one function.
2157
2158 Parameters
2159 ----------
2160 fit_tf1 : ROOT.TF1
2161 The fit function
2162 additional_stats_tf1 : ROOT.TF1
2163 The function carrying the additional statistics as parameters
2164
2165 Returns
2166 -------
2167 ROOT.TF1
2168 """
2169 if additional_stats_tf1 is None:
2170 return fit_tf1
2171
2172 # Combine both TF1 functions
2173 # Get the lower and upper bound of the fit
2174 # Use the pass-by reference containers from pyROOT to be able to call the function
2175 lower_bound = ctypes.c_double()
2176 upper_bound = ctypes.c_double()
2177 fit_tf1.GetRange(lower_bound, upper_bound)
2178 title = fit_tf1.GetTitle()
2179
2180 combined_formula = additional_stats_tf1.GetExpFormula().Data() + '+' + fit_tf1.GetExpFormula().Data()
2181 combined_tf1 = ROOT.TF1("Combined", combined_formula, lower_bound.value, upper_bound.value)
2182 combined_tf1.SetTitle(title)
2183
2184 # Transfer the fitted parameters
2185 chi2 = fit_tf1.GetChisquare()
2186 combined_tf1.SetChisquare(chi2)
2187
2188 ndf = fit_tf1.GetNDF()
2189 combined_tf1.SetNDF(ndf)
2190
2191 n_stats_parameters = additional_stats_tf1.GetNpar()
2192 # n_fit_parameters = fit_tf1.GetNpar()
2193 cls.copy_tf1_parameters(additional_stats_tf1, combined_tf1)
2194 cls.copy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2195
2196 return combined_tf1
2197
2198 @classmethod
2199 def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0):
2200 """Copy the parameters of one TF1 to another.
2201
2202 Parameters
2203 ----------
2204 tf1_source : ROOT.TF1
2205 Take parameters from here
2206 tf1_target : ROOT.TF1
2207 Copy them to here.
2208 offset : int, optional
2209 Index of the first target parameter to which to copy.
2210 """
2211 n_parameters = tf1_source.GetNpar()
2212
2213 # Helper variables for pyROOT's mechanism to call functions by reference
2214 lower_bound = ctypes.c_double()
2215 upper_bound = ctypes.c_double()
2216
2217 for i_source in range(n_parameters):
2218 parameter_name = tf1_source.GetParName(i_source)
2219 i_target = tf1_target.GetParNumber(parameter_name)
2220
2221 # Workaround for a ROOT bug
2222 if i_target == -1:
2223 for i_target in range(tf1_target.GetNpar()):
2224 if parameter_name == tf1_target.GetParName(i_target):
2225 break
2226 else:
2227 i_target = -1
2228 continue
2229
2230 tf1_target.SetParameter(i_target,
2231 tf1_source.GetParameter(i_source))
2232 tf1_target.SetParError(i_target,
2233 tf1_source.GetParError(i_source))
2234
2235 tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2236 tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2237
2238 def attach_attributes(self):
2239 """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2240 # Forward the attributes to the plot by auto assignment
2241
2242 self.check = self.check
2243
2244 self.contact = self.contact
2245
2246 self.description = self.description
2247
2248
2249 self.xlabel = self.xlabel
2250
2251 self.ylabel = self.ylabel
2252
2253 self.title = self.title
2254
2255 def set_maximum(self, maximum):
2256 """Sets the maximum of the vertical plotable range"""
2257 for histogram in self.histograms:
2258 if isinstance(histogram, ROOT.TH1):
2259 histogram.SetMaximum(histogram.GetMaximum(maximum))
2260 else:
2261 histogram.SetMaximum(maximum)
2262
2263 def set_minimum(self, minimum):
2264 """Sets the minimum of the vertical plotable range"""
2265 for histogram in self.histograms:
2266 if isinstance(histogram, ROOT.TH1):
2267 histogram.SetMinimum(histogram.GetMinimum(minimum))
2268 else:
2269 histogram.SetMinimum(minimum)
2270
2271 @classmethod
2272 def set_tstyle(cls):
2273 """Set the style such that the additional stats entries are shown by the TBrowser"""
2274 belle2_validation_style_name = "belle2_validation_style"
2275 belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2276 if not belle2_validation_tstyle:
2277 belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2278
2279 opt_fit = 112
2280 belle2_validation_tstyle.SetOptFit(opt_fit)
2281
2282 opt_stat = 111111
2283 belle2_validation_tstyle.SetOptStat(opt_stat)
2284 ROOT.gROOT.SetStyle(belle2_validation_style_name)
2285
2286 # belle2_validation_tstyle.SetLineStyleString(cls.very_sparse_dots_line_style_index, "4 2000")
2287
2288 else:
2289 belle2_validation_tstyle.cd()
2290
2291
2292def test():
2293 """Simple test method"""
2294 ValidationPlot.set_tstyle()
2295
2296 # Test a histogram plot with some nan and inf values
2297 normal_distributed_values = np.random.randn(1000)
2298
2299 for i in range(10):
2300 normal_distributed_values[i] = np.nan
2301
2302 for i in range(10, 20):
2303 normal_distributed_values[i] = np.inf
2304
2305 for i in range(20, 30):
2306 normal_distributed_values[i] = -np.inf
2307
2308 validation_histogram = ValidationPlot('test_hist')
2309 validation_histogram.hist(normal_distributed_values)
2310 validation_histogram.title = 'A normal distribution'
2311 validation_histogram.xlabel = 'normal'
2312 validation_histogram.ylabel = 'frequency'
2313 validation_histogram.fit_gaus()
2314
2315 # Test for a cumulated histogram - cumulation from left to right
2316 cumulated_histogram = ValidationPlot('test_cum_hist')
2317 cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2318 cumulated_histogram.title = 'A cumulated normal distribution'
2319 cumulated_histogram.xlabel = 'normal'
2320 cumulated_histogram.ylabel = 'cdf'
2321 cumulated_histogram.show()
2322
2323 # Test stacked histogram
2324 # Make a random selection of 40%
2325 stackby = np.random.binomial(1.0, 0.40, 1000)
2326 stacked_validation_histogram = ValidationPlot('test_stacked_hist')
2327 stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2328
2329 # Make a scatterplot with two species.
2330 x = np.random.randn(1000)
2331 y = 3 * np.random.randn(1000)
2332 ones = np.ones_like(x)
2333 angle = np.pi / 2
2334
2335 x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2336 y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2337
2338 stacked_validation_scatter = ValidationPlot('test_stacked_scatter')
2339 stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2340
2341 # Make a profile plot with the two species
2342 stacked_validation_profile = ValidationPlot('test_stacked_profile')
2343 stacked_validation_profile.profile(x1, y1, stackby=stackby)
2344
2345 # Make a two dimensional histogram with two species
2346 stacked_validation_hist2d = ValidationPlot('test_stacked_hist2d')
2347 stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2348
2349 # Test a profile with a diagonal fit
2350 x = np.linspace(-1, 1, 1000)
2351 y = x.copy()
2352 x[0] = np.nan
2353 diagonal_plot = ValidationPlot('test_diag')
2354 diagonal_plot.profile(x, y, bins=50)
2355 diagonal_plot.fit_line()
2356
2357 # Test if cumulation works for profile plots
2358 cumulated_profile = ValidationPlot('test_cum_profile')
2359 cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2360
2361 tfile = ROOT.TFile('test.root', 'RECREATE')
2362
2363 validation_histogram.write(tfile)
2364
2365 with root_cd("expert") as tdirectory1:
2366 diagonal_plot.write(tdirectory1)
2367 cumulated_profile.write(tdirectory1)
2368 cumulated_histogram.write(tdirectory1)
2369
2370 with root_cd("stacked") as tdirectory2:
2371 stacked_validation_histogram.write(tdirectory2)
2372 stacked_validation_scatter.write()
2373 stacked_validation_profile.write()
2374 stacked_validation_hist2d.write()
2375
2376 tfile.Close()
2377
2378 tfile = ROOT.TFile('test.root')
2379 tBrowser = ROOT.TBrowser()
2380 tBrowser.BrowseObject(tfile)
2381 input()
2382 tfile.Close()
2383
2384
2385if __name__ == '__main__':
2386 test()
2387
2388# @endcond