Belle II Software development
plot.py
1#!/usr/bin/env python3
2
3
10
11import sys
12import math
13import collections
14import array
15import numpy as np
16import ctypes
17
18import ROOT
19
20import basf2
21
22from tracking.root_utils import root_cd, root_save_name
23
24from tracking.validation import statistics
25
26import logging
27# @cond internal_test
28
29
30def get_logger():
31 """Get for the logging.Logger instance of this module
32
33 Returns
34 -------
35 logging.Logger
36 Logger instance of this module
37 """
38 return logging.getLogger(__name__)
39
40
41
42units_by_quantity_name = {
43 'x': 'cm',
44 'y': 'cm',
45 'z': 'cm',
46 'E': 'GeV',
47 'px': 'GeV',
48 'p_{x}': 'GeV',
49 'py': 'GeV',
50 'p_{y}': 'GeV',
51 'pz': 'GeV',
52 'p_{z}': 'GeV',
53 'pt': 'GeV',
54 'p_{t}': 'GeV',
55 'd0': 'cm',
56 'd_{0}': 'cm',
57 'phi0': None,
58 '#phi_{0}': None,
59 'omega': '1/cm',
60 '#omega': '1/cm',
61 'z0': 'cm',
62 'z_{0}': 'cm',
63 'tan_lambda': None,
64 'tan #lambda': None}
65
66
67def get_unit(quantity_name):
68 """Infers the unit of a quantity from its name.
69
70 Assumes the standard Belle II unit system.
71
72 Currently looks up the quantity string from units_by_quantity_name.
73
74 Parameters
75 ----------
76 quantity_name : str
77 Name of a quantity (E.g. pt, x, ...)
78
79 Returns
80 -------
81 str
82 """
83
84 unit = units_by_quantity_name.get(quantity_name, None)
85 return unit
86
87
88def compose_axis_label(quantity_name, unit=None):
89 """Formats a quantity name and a unit to a label for a plot axes.
90
91 If the unit is not given to is tried to infer it
92 from the quantity name by the get_unit function.
93
94 Parameters
95 ----------
96 quantity_name : str
97 Name of the quantity to be displayed at the axes
98 unit : str, optional
99 The unit of the quantity. Defaults to get_unit(quantity_name)
100
101 Returns
102 -------
103 str
104 """
105
106 if unit is None:
107 unit = get_unit(quantity_name)
108
109 if unit is None:
110 axis_label = quantity_name
111 else:
112 axis_label = f'{quantity_name} ({unit})'
113
114 return axis_label
115
116
117def find_object_recursive(tFileOrTDirectory, name):
118 """
119 Recursively search for an object in a ROOT file by name.
120
121 - First searches the top-level directory.
122 - Then searches subdirectories recursively.
123 - Includes fallback using gDirectory in case ROOT key table is incomplete.
124
125 @param tFileOrTDirectory (ROOT.TDirectory or ROOT.TFile): The file or directory to search in.
126 @param name (str): The name of the object to find.
127
128 @return ROOT object or None if not found.
129 """
130
131 # Try top-level first
132 obj = tFileOrTDirectory.Get(name)
133 if obj:
134 return obj
135 # Fallback: sometimes objects are accessible only via gDirectory (rare ROOT quirk)
136 # ROOT.gDirectory.cd(tFileOrTDirectory.GetPath())
137 # obj = ROOT.gROOT.FindObject(name)
138 # if obj:
139 # return obj
140
141 # If not found, check each key and recurse into subdirectories
142 for key in tFileOrTDirectory.GetListOfKeys():
143 if key.GetClassName() == 'TDirectoryFile':
144 subdir = tFileOrTDirectory.Get(key.GetName())
145 obj = find_object_recursive(subdir, name)
146 if obj:
147 return obj
148
149 return None
150
151
152def get1DBinningFromReference(name, refFileName):
153 """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
154
155 @param name : name of the TH1 object to be looked for in the file
156 @param refFileName : name of the reference file where the object is searched for
157
158 @return int nbin, float xmin, float xmax of the TH1
159 """
160
161 nbins = None
162 x_min = None
163 x_max = None
164
165 if refFileName is None or refFileName == "":
166 return nbins, x_min, x_max
167
168 # store current directory to not confuse directories by opening a TFile
169 oldDirectory = ROOT.gROOT.CurrentDirectory().load()
170
171 tfile = ROOT.TFile(refFileName)
172 if tfile.IsOpen():
173 objptr = find_object_recursive(tfile, name)
174 if objptr and objptr.InheritsFrom("TH1"):
175 nbins = objptr.GetNbinsX()
176 x_min = objptr.GetXaxis().GetXmin()
177 x_max = objptr.GetXaxis().GetXmax()
178 else:
179 basf2.B2WARNING('Requested object with name: ' + name + ' not found in file: ' + refFileName + " (or not a TH1)")
180 else:
181 basf2.B2WARNING('Requested file: ' + refFileName + ' could not be opened')
182
183 tfile.Close()
184
185 # set current directory back to original one
186 oldDirectory.cd()
187
188 return nbins, x_min, x_max
189
190
191
192StatsEntry = ROOT.TParameter(float)
193
194
195class ValidationPlot:
196
197 """Class for generating a validation plot for the Belle II validation page.
198
199 Typically it generates plots from values stored in numpy arrays and feeds them into
200 plot ROOT classes for storing them.
201
202 It implements an automatic binning procedure based on the rice rule and
203 robust z score outlier detection.
204
205 It also keeps track of additional statistics typically neglected by ROOT such as a count
206 for the non finit values such as NaN, +Inf, -Inf.
207
208 The special attributes for the Belle II validation page like
209 * title
210 * contract
211 * description
212 * check
213 are exposed as properties of this class.
214 """
215
216
217 very_sparse_dots_line_style_index = 28
218 # This line style is defined in the set_tstyle method below.
219
220 def __init__(self, name, referenceFileName=None):
221 """Constructor of the ValidationPlot
222
223 Parameters
224 ----------
225 name : str
226 A unique name to be used as the name of the ROOT object to be generated
227
228 referenceFileName : str
229 name of a reference file. If set the code will try to get the histogram or profile
230 from that file and determine the number of bins and upper and lower bound
231 (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
232 """
233
234
235 self.name = root_save_name(name)
236
237
238 self.referenceFileName = referenceFileName
239
240
241 self._description = ''
242
243
244 self._check = ''
245
246
247 self._contact = ''
248
249
250 self._xlabel = ''
251
252
253 self._ylabel = ''
254
255
256 self._title = ''
257
258
259 self.plot = None
260
261
262 self.histograms = []
263
264
265 self._is_expert = True
266
267
268 self.pvalue_warn = None
269
270
271 self.pvalue_error = None
272
273
274 self.y_log = False
275
276 def hist(self,
277 xs,
278 weights=None,
279 stackby=None,
280 bins=None,
281 lower_bound=None,
282 upper_bound=None,
283 outlier_z_score=None,
284 include_exceptionals=True,
285 allow_discrete=False,
286 cumulation_direction=None,
287 is_expert=True):
288 """Fill the plot with a one dimensional histogram."""
289
290 # if referenceFileName was set the binning will taken from there
291 if self.referenceFileName is not None:
292 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
293 if n is not None and xmin is not None and xmax is not None:
294 bins = n
295 upper_bound = xmax
296 lower_bound = xmin
297
298 th1_factory = ROOT.TH1D
299 self._is_expert = is_expert
300
301 self.create_1d(th1_factory,
302 xs,
303 weights=weights,
304 stackby=stackby,
305 bins=bins,
306 lower_bound=lower_bound,
307 upper_bound=upper_bound,
308 outlier_z_score=outlier_z_score,
309 include_exceptionals=include_exceptionals,
310 allow_discrete=allow_discrete,
311 cumulation_direction=cumulation_direction)
312
313 if not self.ylabel:
314
315 self.ylabel = 'count'
316
317 return self
318
319 def profile(self,
320 xs,
321 ys,
322 weights=None,
323 stackby=None,
324 bins=None,
325 lower_bound=None,
326 upper_bound=None,
327 y_binary=None,
328 y_log=None,
329 outlier_z_score=None,
330 include_exceptionals=True,
331 allow_discrete=False,
332 cumulation_direction=None,
333 gaus_z_score=None,
334 is_expert=True,
335 is_asymmetry=False):
336 """Fill the plot with a one dimensional profile of one variable over another."""
337
338 # if referenceFileName was set the binning will taken from there
339 if self.referenceFileName is not None:
340 n = None
341 xmin = None
342 xmax = None
343 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
344 if n is not None and xmin is not None and xmax is not None:
345 bins = n
346 upper_bound = xmax
347 lower_bound = xmin
348
349 th1_factory = ROOT.TProfile
350 self._is_expert = is_expert
351 if gaus_z_score is None:
352 self.create_1d(th1_factory,
353 xs,
354 ys,
355 weights=weights,
356 stackby=stackby,
357 bins=bins,
358 lower_bound=lower_bound,
359 upper_bound=upper_bound,
360 outlier_z_score=outlier_z_score,
361 include_exceptionals=include_exceptionals,
362 allow_discrete=allow_discrete,
363 cumulation_direction=cumulation_direction)
364 else:
365 # Introduce a dummy name for the temporary two dimensional histogram
366 self.name = "_" + self.name
367 self.hist2d(xs, ys=ys, weights=weights, stackby=stackby,
368 bins=(bins, None),
369 lower_bound=(lower_bound, None),
370 upper_bound=(upper_bound, None),
371 outlier_z_score=(outlier_z_score, outlier_z_score),
372 include_exceptionals=(include_exceptionals, True),
373 allow_discrete=(allow_discrete, False),
374 is_expert=is_expert)
375
376 self.name = self.name[1:]
377 profiles = []
378 for histogram in self.histograms:
379 profile = self.gaus_slice_fit(histogram,
380 name=histogram.GetName()[1:],
381 z_score=gaus_z_score)
382 profiles.append(profile)
383 self.histograms = profiles
384 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False)
385
386 if y_log:
387 self.y_log = True
388
389 if y_binary or self.is_binary(ys):
390 if not self.ylabel:
391 self.ylabel = 'probability'
392
393 min_y = 0
394 if (is_asymmetry):
395 min_y = -1.05
396 for histogram in self.histograms:
397 histogram.SetMinimum(min_y)
398 histogram.SetMaximum(1.05)
399
400 self.plot.SetMinimum(min_y)
401 self.plot.SetMaximum(1.05)
402
403 return self
404
405 def scatter(self,
406 xs,
407 ys,
408 stackby=None,
409 lower_bound=(None, None),
410 upper_bound=(None, None),
411 outlier_z_score=(None, None),
412 include_exceptionals=(True, True),
413 max_n_data=100000,
414 is_expert=True):
415 """Fill the plot with a (unbinned) two dimensional scatter plot"""
416
417 self._is_expert = is_expert
418
419 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
420 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
421 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
422 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
423
424 x_lower_bound, x_upper_bound = self.determine_range(
425 xs,
426 lower_bound=x_lower_bound,
427 upper_bound=x_upper_bound,
428 outlier_z_score=x_outlier_z_score,
429 include_exceptionals=x_include_exceptionals
430 )
431
432 y_lower_bound, y_upper_bound = self.determine_range(
433 ys,
434 lower_bound=y_lower_bound,
435 upper_bound=y_upper_bound,
436 outlier_z_score=y_outlier_z_score,
437 include_exceptionals=y_include_exceptionals
438 )
439
440 graph = ROOT.TGraph()
441
442 graph.SetName(self.name)
443 graph.SetMarkerStyle(6)
444 graph.GetHistogram().SetOption("AP") # <-- this has no effect?
445
446 # Trying to make the graph lines invisible
447 color_index = 0 # white
448 # Transparent white
449 graph.SetLineColorAlpha(color_index, 0)
450 graph.SetLineStyle(self.very_sparse_dots_line_style_index)
451
452 # Transport the lower and upper bound as ranges of the axis
453 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
454 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
455
456 self.create(graph,
457 xs=xs,
458 ys=ys,
459 stackby=stackby,
460 reverse_stack=False)
461
462 return self
463
464 def grapherrors(self,
465 xs_and_err,
466 ys_and_err,
467 stackby=None,
468 lower_bound=(None, None),
469 upper_bound=(None, None),
470 outlier_z_score=(None, None),
471 include_exceptionals=(True, True),
472 max_n_data=100000,
473 is_expert=True):
474 """Fill the plot with a (unbinned) two dimensional scatter plot
475 xs_and_err and ys_and_err are tuples containing the values and the errors on these values
476 as numpy arrays
477 """
478
479 self._is_expert = is_expert
480
481 xs = xs_and_err[0]
482 ys = ys_and_err[0]
483
484 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
485 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
486 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
487 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
488
489 x_lower_bound, x_upper_bound = self.determine_range(
490 xs,
491 lower_bound=x_lower_bound,
492 upper_bound=x_upper_bound,
493 outlier_z_score=x_outlier_z_score,
494 include_exceptionals=x_include_exceptionals
495 )
496
497 y_lower_bound, y_upper_bound = self.determine_range(
498 ys,
499 lower_bound=y_lower_bound,
500 upper_bound=y_upper_bound,
501 outlier_z_score=y_outlier_z_score,
502 include_exceptionals=y_include_exceptionals
503 )
504
505 graph = ROOT.TGraphErrors()
506
507 graph.SetName(self.name)
508 graph.GetHistogram().SetOption("A")
509
510 graph.SetMarkerColor(4)
511 graph.SetMarkerStyle(21)
512
513 # Transport the lower and upper bound as ranges of the axis
514 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
515 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
516
517 self.create(graph,
518 xs=xs,
519 ys=ys,
520 stackby=stackby,
521 reverse_stack=False)
522
523 return self
524
525 def hist2d(self,
526 xs,
527 ys,
528 weights=None,
529 stackby=None,
530 bins=(None, None),
531 lower_bound=(None, None),
532 upper_bound=(None, None),
533 outlier_z_score=(None, None),
534 include_exceptionals=(True, True),
535 allow_discrete=(False, False),
536 quantiles=None,
537 is_expert=True):
538 """Fill the plot with a two dimensional histogram"""
539
540 name = self.name
541 # Introduce a dummy name for the temporary two dimensional histogram
542 if quantiles is not None:
543 name = "_" + self.name
544
545 x_bins, y_bins = self.unpack_2d_param(bins)
546 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
547 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
548 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
549 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
550 x_allow_discrete, y_allow_discrete = self.unpack_2d_param(allow_discrete)
551
552 if quantiles is not None:
553 y_include_exceptionals = True
554 y_allow_discrete = False
555
556 x_bin_edges, x_bin_labels = self.determine_bin_edges(xs,
557 stackbys=stackby,
558 bins=x_bins,
559 lower_bound=x_lower_bound,
560 upper_bound=x_upper_bound,
561 outlier_z_score=x_outlier_z_score,
562 include_exceptionals=x_include_exceptionals,
563 allow_discrete=x_allow_discrete)
564
565 y_bin_edges, y_bin_labels = self.determine_bin_edges(ys,
566 stackbys=stackby,
567 bins=y_bins,
568 lower_bound=y_lower_bound,
569 upper_bound=y_upper_bound,
570 outlier_z_score=y_outlier_z_score,
571 include_exceptionals=y_include_exceptionals,
572 allow_discrete=y_allow_discrete)
573
574 n_x_bins = len(x_bin_edges) - 1
575 n_y_bins = len(y_bin_edges) - 1
576
577 self.lower_bound = (x_bin_edges[0], y_bin_edges[0])
578
579 self.upper_bound = (x_bin_edges[-1], y_bin_edges[-1])
580
581 histogram = ROOT.TH2D(name,
582 '',
583 n_x_bins,
584 x_bin_edges,
585 n_y_bins,
586 y_bin_edges)
587
588 if x_bin_labels:
589 get_logger().info("Scatter plot %s is discrete in x.", name)
590 x_taxis = histogram.GetXaxis()
591 for i_x_bin, x_bin_label in enumerate(x_bin_labels):
592 x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
593 self.add_stats_entry(histogram, "dx", 1)
594
595 else:
596 x_bin_width = x_bin_edges[1] - x_bin_edges[0]
597 self.add_stats_entry(histogram, "dx", x_bin_width)
598
599 if y_bin_labels:
600 get_logger().info("Scatter plot %s is discrete in y.", name)
601 y_taxis = histogram.GetYaxis()
602 for i_y_bin, y_bin_label in enumerate(y_bin_labels):
603 y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
604 self.add_stats_entry(histogram, "dy", 1)
605
606 else:
607 y_bin_width = y_bin_edges[1] - y_bin_edges[0]
608 self.add_stats_entry(histogram, "dy", y_bin_width)
609
610 self.create(histogram, xs, ys=ys, weights=weights, stackby=stackby)
611
612 if quantiles is not None:
613 self.name = self.name[1:]
614 profiles = []
615 for histogram in self.histograms:
616 for quantile in quantiles:
617 profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] + '_' + str(quantile))
618
619 # Manually copy labels grumble grumble
620 x_taxis = histogram.GetXaxis()
621 new_x_taxis = profile.GetXaxis()
622 for i_bin in range(x_taxis.GetNbins() + 2):
623 label = x_taxis.GetBinLabel(i_bin)
624 if label != "":
625 new_x_taxis.SetBinLabel(i_bin, label)
626
627 # Remove faulty error values)
628 epsilon = sys.float_info.epsilon
629 for i_bin in range(0, profile.GetNbinsX() + 2):
630 profile.SetBinError(i_bin, epsilon)
631
632 profiles.append(profile)
633
634 self.histograms = profiles
635 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False, force_graph=True)
636
637 # Adjust the discrete bins after the filling to be equidistant
638 if x_bin_labels:
639 for histogram in self.histograms:
640 x_taxis = histogram.GetXaxis()
641 x_bin_edges = array.array("d", list(range(len(x_bin_labels) + 1)))
642 x_taxis.Set(n_x_bins, x_bin_edges)
643
644 # Adjust the discrete bins after the filling to be equidistant
645 if y_bin_labels:
646 for histogram in self.histograms:
647 x_taxis = histogram.GetXaxis()
648 y_bin_edges = array.array("d", list(range(len(y_bin_labels) + 1)))
649 y_taxis.Set(n_y_bins, y_bin_edges)
650
651 return self
652
653 def fit_gaus(self, z_score=None):
654 """Fit a Gaus bell curve to the central portion of a one dimensional histogram
655
656 The fit is applied to the central mean +- z_score * std interval of the histogram,
657 such that it is less influence by non gaussian tails further away than the given z score.
658
659 @param float z_score number of sigmas to include from the mean value of the histogram.
660 """
661 title = "gaus"
662 formula = "gaus"
663
664 plot = self.plot
665 if plot is None:
666 raise RuntimeError('Validation plot must be filled before it can be fitted.')
667
668 if not isinstance(plot, ROOT.TH1D):
669 raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
670
671 histogram = plot
672
673 fit_tf1 = ROOT.TF1("Fit", formula)
674 fit_tf1.SetTitle(title)
675 fit_tf1.SetParName(0, "n")
676 fit_tf1.SetParName(1, "mean")
677 fit_tf1.SetParName(2, "std")
678
679 n = histogram.GetSumOfWeights()
680 mean = histogram.GetMean()
681 std = histogram.GetStdDev()
682
683 fit_tf1.SetParameter(0, n)
684 fit_tf1.SetParameter(1, mean)
685 fit_tf1.SetParameter(2, std)
686
687 fit_options = "LM"
688 return self.fit(fit_tf1,
689 fit_options,
690 z_score=z_score)
691
692 def fit_line(self):
693 """Fit a general line to a one dimensional histogram"""
694 title = "line"
695 formula = "x++1"
696 fit_tf1 = ROOT.TF1("Fit", formula)
697 fit_tf1.SetTitle(title)
698 fit_tf1.SetParName(0, "slope")
699 fit_tf1.SetParName(1, "intercept")
700 self.fit(fit_tf1, 'M')
701
702 def fit_const(self):
703 """Fit a constant function to a one dimensional histogram"""
704 title = "const"
705 formula = "[0]"
706 fit_tf1 = ROOT.TF1("Fit", formula)
707 fit_tf1.SetTitle(title)
708 fit_tf1.SetParName(0, "intercept")
709 self.fit(fit_tf1, 'M')
710
711 def fit_diag(self):
712 """Fit a diagonal line through the origin to a one dimensional histogram"""
713 title = "diag"
714 formula = "[0]*x"
715 fit_tf1 = ROOT.TF1("Fit", formula)
716 fit_tf1.SetTitle(title)
717 fit_tf1.SetParName(0, "slope")
718 self.fit(fit_tf1, 'M')
719
720 def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
721 """Fit a user defined function to a one dimensional histogram
722
723 Parameters
724 ----------
725 formula : str or TF1
726 Formula string or TH1 to be fitted. See TF1 constructors for that is a valid formula
727 options : str
728 Options string to be used in the fit. See TH1::Fit()
729 lower_bound : float
730 Lower bound of the range to be fitted
731 upper_bound : float
732 Upper bound of the range to be fitted
733 """
734 plot = self.plot
735 if plot is None:
736 raise RuntimeError('Validation plot must be filled before it can be fitted.')
737
738 if not isinstance(plot, ROOT.TH1D):
739 raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
740
741 histogram = plot
742
743 xaxis = histogram.GetXaxis()
744 n_bins = xaxis.GetNbins()
745 hist_lower_bound = xaxis.GetBinLowEdge(1)
746 hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
747
748 if z_score is not None:
749 mean = histogram.GetMean()
750 std = histogram.GetStdDev()
751
752 if lower_bound is None:
753 lower_bound = mean - z_score * std
754
755 if upper_bound is None:
756 upper_bound = mean + z_score * std
757
758 # Setup the plotting range of the function to match the histogram
759 if isinstance(formula, ROOT.TF1):
760 fit_tf1 = formula
761 fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
762 else:
763 fit_tf1 = ROOT.TF1("Fit",
764 formula,
765 hist_lower_bound,
766 hist_upper_bound)
767 get_logger().info('Fitting with %s', fit_tf1.GetExpFormula())
768
769 # Determine fit range
770 if lower_bound is None or lower_bound < hist_lower_bound:
771 lower_bound = hist_lower_bound
772 if upper_bound is None or upper_bound > hist_upper_bound:
773 upper_bound = hist_upper_bound
774
775 # Make sure the fitted function is not automatically added since we want to do that one our own.
776 # Look for the documentation of TH1::Fit() for details of the options.
777 if 'N' not in options:
778 options += 'N'
779
780 fit_res = histogram.Fit(fit_tf1, options + "S", "", lower_bound, upper_bound)
781
782 self.set_fit_tf1(histogram, fit_tf1)
783 return fit_res
784
785 def show(self):
786 """Show the plot."""
787 if self.plot:
788 self.plot.Draw()
789 else:
790 raise ValueError("Can not show a validation plot that has not been filled.")
791
792 def write(self, tdirectory=None):
793 """Write the plot to file
794
795 Parameters
796 ----------
797 tdirectory : ROOT.TDirectory, optional
798 ROOT directory to which the plot should be written.
799 If omitted write to the current directory
800 """
801 if not self.plot:
802 raise ValueError("Can not write a validation plot that has not been filled.")
803
804 with root_cd(tdirectory):
805 ValidationPlot.set_tstyle()
806 if self.plot not in self.histograms:
807 self.plot.Write()
808
809 # always disable ROOT's stat plot because it hides items
810 meta_options = ["nostats"]
811
812 # add expert option, if requested
813 if self.is_expert:
814 meta_options.append("expert")
815 else:
816 meta_options.append("shifter")
817
818 # give a custom pvalue warning / error zone if requested
819 if self.pvalue_error is not None:
820 meta_options.append(f"pvalue-error={self.pvalue_error}")
821 if self.pvalue_warn is not None:
822 meta_options.append(f"pvalue-warn={self.pvalue_warn}")
823
824 # Indicator if the y axes should be displayed as a logarithmic scale
825 if self.y_log:
826 meta_options.append("logy")
827
828 meta_options_str = ",".join(meta_options)
829
830 for histogram in self.histograms:
831 histogram.GetListOfFunctions().Add(ROOT.TNamed('MetaOptions', meta_options_str))
832 histogram.Write()
833
834 @property
835 def is_expert(self):
836 """Getter method if an plot plot is marked as expert plot"""
837 return self._is_expert
838
839 @property
840 def title(self):
841 """Getter for the plot title"""
842 return self._title
843
844 @title.setter
845 def title(self, title):
846 """Setter for the plot title"""
847 self._title = title
848 if self.plot:
849 self.plot.SetTitle(title)
850 for histogram in self.histograms:
851 histogram.SetTitle(title)
852
853 @property
854 def xlabel(self):
855 """Getter for the axis label at the x axis"""
856 return self._xlabel
857
858 @xlabel.setter
859 def xlabel(self, xlabel):
860 """Setter for the axis label at the x axis"""
861 self._xlabel = xlabel
862 for histogram in self.histograms:
863 histogram.GetXaxis().SetTitle(xlabel)
864
865 @property
866 def ylabel(self):
867 """Getter for the axis label at the y axis"""
868 return self._ylabel
869
870 @ylabel.setter
871 def ylabel(self, ylabel):
872 """Setter for the axis label at the y axis"""
873 self._ylabel = ylabel
874 for histogram in self.histograms:
875 histogram.GetYaxis().SetTitle(ylabel)
876
877 @property
878 def contact(self):
879 """Getter for the contact email address to be displayed on the validation page"""
880 return self._contact
881
882 @contact.setter
883 def contact(self, contact):
884 """Setter for the contact email address to be displayed on the validation page"""
885 self._contact = contact
886 for histogram in self.histograms:
887 found_obj = histogram.FindObject('Contact')
888 if not found_obj:
889 tnamed = ROOT.TNamed("Contact", contact)
890 histogram.GetListOfFunctions().Add(tnamed)
891 found_obj = histogram.FindObject('Contact')
892 found_obj.SetTitle(contact)
893
894 @property
895 def description(self):
896 """Getter for the description to be displayed on the validation page"""
897 return self._description
898
899 @description.setter
900 def description(self, description):
901 """Setter for the description to be displayed on the validation page"""
902 self._description = description
903 for histogram in self.histograms:
904 found_obj = histogram.FindObject('Description')
905 if not found_obj:
906 tnamed = ROOT.TNamed("Description", description)
907 histogram.GetListOfFunctions().Add(tnamed)
908 found_obj = histogram.FindObject('Description')
909 found_obj.SetTitle(description)
910
911 @property
912 def check(self):
913 """Getter for the check to be displayed on the validation page"""
914 return self._check
915
916 @check.setter
917 def check(self, check):
918 """Setter for the check to be displayed on the validation page"""
919 self._check = check
920 for histogram in self.histograms:
921 found_obj = histogram.FindObject('Check')
922 if not found_obj:
923 tnamed = ROOT.TNamed("Check", check)
924 histogram.GetListOfFunctions().Add(tnamed)
925 found_obj = histogram.FindObject('Check')
926 found_obj.SetTitle(check)
927
928 # Implementation details #
929 # ###################### #
930
931 @staticmethod
932 def unpack_2d_param(param):
933 """Unpacks a function parameter for the two dimensional plots.
934
935 If it is a pair the first parameter shall apply to the x coordinate
936 the second to the y coordinate. In this case the pair is returned as two values
937
938 If something else is given the it is assumed that this parameter should equally apply
939 to both coordinates. In this case the same values is return twice as a pair.
940
941 Parameters
942 ----------
943 param : pair or single value
944 Function parameter for a two dimensional plot
945
946 Returns
947 -------
948 pair
949 A pair of values being the parameter for the x coordinate and
950 the y coordinate respectively
951 """
952 try:
953 if len(param) == 2:
954 x_param, y_param = param
955 except TypeError:
956 x_param = param
957 y_param = param
958 return x_param, y_param
959
960 @staticmethod
961 def is_binary(xs):
962 """Determine if the data consists of boolean values"""
963 return statistics.is_binary_series(xs)
964
965 @staticmethod
966 def is_discrete(xs, max_n_unique=None):
967 """Determine if the data consists of discrete values"""
968 return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
969
970 @staticmethod
971 def get_exceptional_values(xs):
972 """Find exceptionally frequent values
973
974 Parameters
975 ----------
976 xs : np.array (1d)
977 Data series
978
979 Returns
980 -------
981 np.array (1d)
982 A list of the found exceptional values.
983 """
984 return statistics.rice_exceptional_values(xs)
985
986 @staticmethod
987 def get_robust_mean_and_std(xs):
988 """Does an estimation of mean and standard deviation robust against outliers.
989
990 Parameters
991 ----------
992 xs : np.array (1d)
993 Data series
994
995 Returns
996 -------
997 float, float
998 Pair of mean and standard deviation
999 """
1000 x_mean = statistics.truncated_mean(xs)
1001 x_std = statistics.trimmed_std(xs)
1002 return x_mean, x_std
1003
1004 @staticmethod
1005 def format_bin_label(value):
1006 """Formats a value to be placed at a tick on an axis."""
1007 if np.isfinite(value) and value == np.round(value):
1008 return str(int(value))
1009 else:
1010 formated_value = f"{value:.5g}"
1011
1012 # if the label is to long, switch to shorter "e" format
1013 if len(formated_value) > 8:
1014 formated_value = f"{value:.3e}"
1015 return formated_value
1016
1017 def create_1d(self,
1018 th1_factory,
1019 xs,
1020 ys=None,
1021 weights=None,
1022 bins=None,
1023 stackby=None,
1024 lower_bound=None,
1025 upper_bound=None,
1026 outlier_z_score=None,
1027 include_exceptionals=True,
1028 allow_discrete=False,
1029 cumulation_direction=None):
1030 """Combined factory method for creating a one dimensional histogram or a profile plot."""
1031 name = self.name
1032
1033 # Coerce values to a numpy array. Do not copy if already a numpy array.
1034 xs = np.array(xs, copy=False)
1035
1036 if ys is not None:
1037 ys = np.array(ys, copy=False)
1038
1039 if weights is not None:
1040 weights = np.array(weights, copy=False)
1041
1042 bin_edges, bin_labels = self.determine_bin_edges(xs,
1043 stackbys=stackby,
1044 bins=bins,
1045 lower_bound=lower_bound,
1046 upper_bound=upper_bound,
1047 outlier_z_score=outlier_z_score,
1048 include_exceptionals=include_exceptionals,
1049 allow_discrete=allow_discrete)
1050
1051 n_bins = len(bin_edges) - 1
1052 self.lower_bound = bin_edges[0]
1053 self.upper_bound = bin_edges[-1]
1054 histogram = th1_factory(name, '', n_bins, bin_edges)
1055
1056 if bin_labels:
1057 get_logger().info("One dimensional plot %s is discrete in x.", name)
1058 x_taxis = histogram.GetXaxis()
1059 for i_bin, bin_label in enumerate(bin_labels):
1060 x_taxis.SetBinLabel(i_bin + 1, bin_label)
1061 self.add_stats_entry(histogram, "dx", 1)
1062
1063 else:
1064 bin_width = bin_edges[1] - bin_edges[0]
1065 self.add_stats_entry(histogram, "dx", bin_width)
1066
1067 self.create(histogram,
1068 xs,
1069 ys=ys,
1070 weights=weights,
1071 stackby=stackby,
1072 cumulation_direction=cumulation_direction,
1073 reverse_stack=True)
1074 # Reverse the stack to have the signal distribution at the bottom
1075
1076 # Adjust the discrete bins after the filling to be equidistant
1077 if bin_labels:
1078 for histogram in self.histograms:
1079 x_taxis = histogram.GetXaxis()
1080 bin_edges = array.array("d", list(range(len(bin_labels) + 1)))
1081 x_taxis.Set(n_bins, bin_edges)
1082
1083 def create(self,
1084 histogram_template,
1085 xs,
1086 ys=None,
1087 weights=None,
1088 stackby=None,
1089 cumulation_direction=None,
1090 reverse_stack=None):
1091 """Create histograms from a template, possibly stacked"""
1092
1093 histograms = []
1094
1095 if stackby is None:
1096 histogram = histogram_template
1097 self.fill_into(histogram, xs, ys, weights=weights)
1098 if cumulation_direction is not None:
1099 histogram = self.cumulate(histogram, cumulation_direction=cumulation_direction)
1100
1101 histograms.append(histogram)
1102 plot = histogram
1103
1104 else:
1105 stackby = np.array(stackby, copy=False)
1106 name = histogram_template.GetName()
1107
1108 histograms = self.fill_into_grouped(histogram_template,
1109 xs,
1110 ys=ys,
1111 weights=weights,
1112 groupbys=stackby,
1113 groupby_label="stack")
1114
1115 if cumulation_direction is not None:
1116 histograms = [self.cumulate(histogram, cumulation_direction=cumulation_direction)
1117 for histogram in histograms]
1118
1119 plot = self.create_stack(histograms, name=name + "_stacked", reverse_stack=reverse_stack)
1120
1121 self.histograms = histograms
1122 self.plot = plot
1123 self.attach_attributes()
1124
1125 @classmethod
1126 def create_stack(cls, histograms, name, reverse_stack, force_graph=False):
1127 """Create a stack of histograms"""
1128 if len(histograms) == 1:
1129 plot = histograms[0]
1130 else:
1131 if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph)) or force_graph:
1132 plot = ROOT.TMultiGraph()
1133 else:
1134 plot = ROOT.THStack()
1135
1136 plot.SetName(name)
1137
1138 # Add the histogram in reverse order such
1139 # that the signal usually is on the bottom an well visible
1140 if reverse_stack:
1141 for histogram in reversed(histograms):
1142 if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1143 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1144 plot.Add(histogram, "APZ")
1145 else:
1146 plot.Add(histogram)
1147 else:
1148 for histogram in histograms:
1149 if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1150 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1151 plot.Add(histogram, "APZ")
1152 else:
1153 plot.Add(histogram)
1154
1155 return plot
1156
1157 @classmethod
1158 def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False):
1159 """Extract errors from a TProfile histogram and create a TGraph from these"""
1160 if isinstance(tprofile, ROOT.TGraph):
1161 return tprofile
1162
1163 x_taxis = tprofile.GetXaxis()
1164 n_bins = x_taxis.GetNbins()
1165
1166 # bin_ids_with_underflow = list(range(n_bins + 1))
1167 bin_ids_without_underflow = list(range(1, n_bins + 1))
1168
1169 bin_centers = np.array([x_taxis.GetBinCenter(i_bin) for i_bin in bin_ids_without_underflow])
1170 if abs_x:
1171 bin_centers = np.abs(bin_centers)
1172 bin_widths = np.array([x_taxis.GetBinWidth(i_bin) for i_bin in bin_ids_without_underflow])
1173 bin_x_errors = bin_widths / 2.0
1174
1175 # Now for the histogram content not including the underflow
1176 bin_contents = np.array([tprofile.GetBinContent(i_bin) for i_bin in bin_ids_without_underflow])
1177 bin_y_errors = np.array([tprofile.GetBinError(i_bin) for i_bin in bin_ids_without_underflow])
1178
1179 tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1180
1181 tgrapherrors.GetHistogram().SetOption("APZ") # <-- This has no effect?
1182
1183 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1184 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1185
1186 # Copy all functions and stats entries
1187 for tobject in tprofile.GetListOfFunctions():
1188 tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1189
1190 # Add stats entries that are displayed for profile plots
1191 cls.add_stats_entry(tgrapherrors, 'count', tprofile.GetEntries())
1192
1193 stats_values = np.array([np.nan] * 6)
1194 tprofile.GetStats(stats_values)
1195
1196 sum_w = stats_values[0]
1197 # sum_w2 = stats_values[1]
1198 sum_wx = stats_values[2]
1199 sum_wx2 = stats_values[3]
1200 sum_wy = stats_values[4]
1201 sum_wy2 = stats_values[5]
1202
1203 cls.add_stats_entry(tgrapherrors,
1204 "x avg",
1205 sum_wx / sum_w)
1206
1207 cls.add_stats_entry(tgrapherrors,
1208 "x std",
1209 np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1210
1211 cls.add_stats_entry(tgrapherrors,
1212 "y avg",
1213 sum_wy / sum_w)
1214
1215 cls.add_stats_entry(tgrapherrors,
1216 "y std",
1217 np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1218
1219 cls.add_stats_entry(tgrapherrors,
1220 'cov',
1221 tgrapherrors.GetCovariance())
1222
1223 cls.add_stats_entry(tgrapherrors,
1224 'corr',
1225 tgrapherrors.GetCorrelationFactor())
1226
1227 return tgrapherrors
1228
1229 def fill_into_grouped(self,
1230 histogram_template,
1231 xs,
1232 ys=None,
1233 weights=None,
1234 groupbys=None,
1235 groupby_label="group"):
1236 """Fill data into similar histograms in groups indicated by a groupby array"""
1237
1238 histograms = []
1239 unique_groupbys = np.unique(groupbys)
1240 name = histogram_template.GetName()
1241
1242 for i_value, value in enumerate(unique_groupbys):
1243 if np.isnan(value):
1244 indices_for_value = np.isnan(groupbys)
1245 else:
1246 indices_for_value = groupbys == value
1247
1248 # Make a copy of the empty histogram
1249 histogram_for_value = histogram_template.Clone(name + '_' + str(value))
1250 i_root_color = i_value + 1
1251
1252 self.set_color(histogram_for_value, i_root_color)
1253
1254 if groupby_label:
1255 self.add_stats_entry(histogram_for_value, groupby_label, value)
1256
1257 self.fill_into(histogram_for_value,
1258 xs,
1259 ys=ys,
1260 weights=weights,
1261 filter=indices_for_value)
1262
1263 histograms.append(histogram_for_value)
1264
1265 return histograms
1266
1267 def set_color(self, tobject, root_i_color):
1268 """Set the color of the ROOT object.
1269
1270 By default the line color of a TGraph should be invisible, so do not change it
1271 For other objects set the marker and the line color
1272
1273 Parameters
1274 ----------
1275 tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1276 Object of which the color should be set.
1277 root_i_color : int
1278 Color index of the ROOT color table
1279 """
1280 if isinstance(tobject, ROOT.TGraph):
1281 tobject.SetMarkerColor(root_i_color)
1282 else:
1283 tobject.SetLineColor(root_i_color)
1284 tobject.SetMarkerColor(root_i_color)
1285
1286 def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1287 """Fill the data into the plot object"""
1288 if isinstance(plot, ROOT.TGraph):
1289 if ys is None:
1290 raise ValueError("ys are required for filling a graph")
1291 self.fill_into_tgraph(plot, xs, ys, filter=filter)
1292 elif isinstance(plot, ROOT.TGraphErrors):
1293 if ys is None:
1294 raise ValueError("ys are required for filling a graph error")
1295
1296 self.fill_into_tgrapherror(plot, xs, ys)
1297 else:
1298 self.fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1299
1300 def fill_into_tgrapherror(self, graph, xs, ys, filter=None):
1301 """fill point values and error of the x and y axis into the graph"""
1302
1303 assert (len(xs[0]) == len(ys[0]))
1304 # set the overall amount of points
1305 graph.Set(len(xs[0]))
1306
1307 for i in range(len(xs[0])):
1308 graph.SetPoint(i, xs[0][i], ys[0][i])
1309 graph.SetPointError(i, xs[1][i], ys[1][i])
1310
1311 def fill_into_tgraph(self, graph, xs, ys, filter=None):
1312 """Fill the data into a TGraph"""
1313
1314 # Save some ifs by introducing a dummy slicing as a non filter
1315 if filter is None:
1316 filter = slice(None)
1317
1318 xs = xs[filter]
1319 ys = ys[filter]
1320
1321 max_n_data = 100000
1322 x_n_data = len(xs)
1323 y_n_data = len(ys)
1324
1325 if max_n_data:
1326 if x_n_data > max_n_data or y_n_data > max_n_data:
1327 get_logger().warning(f"Number of points in scatter graph {self.name} exceed limit {max_n_data}")
1328
1329 get_logger().warning(f"Cropping {max_n_data}")
1330
1331 xs = xs[0:max_n_data]
1332 ys = ys[0:max_n_data]
1333
1334 x_axis = graph.GetXaxis()
1335 y_axis = graph.GetYaxis()
1336
1337 x_lower_bound = x_axis.GetXmin()
1338 x_upper_bound = x_axis.GetXmax()
1339
1340 y_lower_bound = y_axis.GetXmin()
1341 y_upper_bound = y_axis.GetXmax()
1342
1343 x_underflow_indices = xs < x_lower_bound
1344 x_overflow_indices = xs > x_upper_bound
1345
1346 y_underflow_indices = ys < y_lower_bound
1347 y_overflow_indices = ys > y_upper_bound
1348
1349 plot_indices = ~(np.isnan(xs) |
1350 x_underflow_indices |
1351 x_overflow_indices |
1352 np.isnan(ys) |
1353 y_underflow_indices |
1354 y_overflow_indices)
1355
1356 n_plot_data = np.sum(plot_indices)
1357 plot_xs = xs[plot_indices]
1358 plot_ys = ys[plot_indices]
1359
1360 graph.Set(int(n_plot_data))
1361 for i, (x, y) in enumerate(zip(plot_xs, plot_ys)):
1362 graph.SetPoint(i, x, y)
1363
1364 self.add_stats_entry(graph, 'count', np.sum(np.isfinite(xs)))
1365
1366 self.add_nan_inf_stats(graph, 'x', xs)
1367 self.add_nan_inf_stats(graph, 'y', ys)
1368
1369 x_n_underflow = np.sum(x_underflow_indices)
1370 if x_n_underflow:
1371 self.add_stats_entry(graph, 'x underf.', x_n_underflow)
1372
1373 x_n_overflow = np.sum(x_overflow_indices)
1374 if x_n_overflow:
1375 self.add_stats_entry(graph, 'x overf.', x_n_overflow)
1376
1377 y_n_underflow = np.sum(y_underflow_indices)
1378 if y_n_underflow:
1379 self.add_stats_entry(graph, 'y underf.', y_n_underflow)
1380
1381 y_n_overflow = np.sum(y_overflow_indices)
1382 if y_n_overflow:
1383 self.add_stats_entry(graph, 'y overf.', y_n_overflow)
1384
1385 self.add_stats_entry(graph, 'x avg', graph.GetMean(1))
1386 self.add_stats_entry(graph, 'x std', graph.GetRMS(1))
1387
1388 self.add_stats_entry(graph, 'y avg', graph.GetMean(2))
1389 self.add_stats_entry(graph, 'y std', graph.GetRMS(2))
1390
1391 self.add_stats_entry(graph, 'cov', graph.GetCovariance())
1392 self.add_stats_entry(graph, 'corr', graph.GetCorrelationFactor())
1393
1394 def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None):
1395 """Fill the histogram blocking non finite values
1396
1397 Parameters
1398 ----------
1399 histogram : ROOT.TH1
1400 The histogram to be filled
1401 xs : numpy.ndarray (1d)
1402 Data for the first axes
1403 ys : numpy.ndarray (1d), optional
1404 Data for the second axes
1405 weights : numpy.ndarray (1d), optional
1406 Weight of the individual points. Defaults to one for each
1407 filter : numpy.ndarray, optional
1408 Boolean index array indicating which entries shall be taken.
1409 """
1410 # Save some ifs by introducing a dummy slicing as a non filter
1411 if filter is None:
1412 filter = slice(None)
1413
1414 xs = xs[filter]
1415 # Count the nan and inf values in x
1416 self.add_nan_inf_stats(histogram, 'x', xs)
1417 finite_filter = np.isfinite(xs)
1418
1419 if ys is not None:
1420 ys = ys[filter]
1421 # Count the nan and inf values in y
1422 self.add_nan_inf_stats(histogram, 'y', ys)
1423 finite_filter &= np.isfinite(ys)
1424
1425 if weights is None:
1426 xs = xs[finite_filter]
1427 weights = np.ones_like(xs)
1428 else:
1429 weights = weights[filter]
1430 self.add_nan_inf_stats(histogram, 'w', weights)
1431 finite_filter &= np.isfinite(weights)
1432 xs = xs[finite_filter]
1433 weights[finite_filter]
1434
1435 if ys is not None:
1436 ys = ys[finite_filter]
1437
1438 # Now fill the actual histogram
1439 try:
1440 histogram.FillN
1441 except AttributeError:
1442 Fill = histogram.Fill
1443 if ys is None:
1444 fill = np.frompyfunc(Fill, 2, 1)
1445 fill(xs.astype(np.float64, copy=False),
1446 weights.astype(np.float64, copy=False))
1447 else:
1448 fill = np.frompyfunc(Fill, 3, 1)
1449 fill(xs.astype(np.float64, copy=False),
1450 ys.astype(np.float64, copy=False),
1451 weights.astype(np.float64, copy=False))
1452 else:
1453 if ys is None:
1454 # Make the array types compatible with the ROOT interface if necessary
1455 xs = xs.astype(np.float64, copy=False)
1456 weights = weights.astype(np.float64, copy=False)
1457 n = len(xs)
1458 if n != 0:
1459 histogram.FillN(n, xs, weights)
1460 else:
1461 basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1462
1463 else:
1464 # Make the array types compatible with the ROOT interface if necessary
1465 xs = xs.astype(np.float64, copy=False)
1466 ys = ys.astype(np.float64, copy=False)
1467 weights = weights.astype(np.float64, copy=False)
1468 n = len(xs)
1469 if n != 0:
1470 histogram.FillN(n, xs, ys, weights)
1471 else:
1472 basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1473
1474 self.set_additional_stats_tf1(histogram)
1475
1476 @classmethod
1477 def add_nan_inf_stats(cls, histogram, name, xs):
1478 """ Extracts the counts of non finite floats from a series
1479 and adds them as additional statistics to the histogram.
1480
1481 Parameters
1482 ----------
1483 histogram : derived from ROOT.TH1 or ROOT.TGraph
1484 Something having a GetListOfFunctions method that
1485 name : str
1486 A label for the data series to be prefixed to the entries.
1487 xs : numpy.ndarray (1d)
1488 Data from which the non finit floats should be counted.
1489 """
1490 n_nans = np.isnan(xs).sum()
1491 if n_nans > 0:
1492 cls.add_stats_entry(histogram, name + ' nan', n_nans)
1493
1494 n_positive_inf = np.sum(xs == np.inf)
1495 if n_positive_inf > 0:
1496 cls.add_stats_entry(histogram, name + ' pos inf', n_positive_inf)
1497
1498 n_negative_inf = np.sum(xs == -np.inf)
1499 if n_negative_inf > 0:
1500 cls.add_stats_entry(histogram, name + ' neg inf', n_negative_inf)
1501
1502 @classmethod
1503 def add_stats_entry(cls, histogram, label, value):
1504 """Add a new additional statistics to the histogram.
1505
1506 Parameters
1507 ----------
1508 histogram : derived from ROOT.TH1 or ROOT.TGraph
1509 Something having a GetListOfFunctions method that holds the additional statistics
1510 label : str
1511 Label of the statistic
1512 value : float
1513 Value of the statistic
1514 """
1515 stats_entry = StatsEntry(str(label), float(value))
1516 histogram.GetListOfFunctions().Add(stats_entry)
1517 cls.set_additional_stats_tf1(histogram)
1518
1519 @classmethod
1520 def get_additional_stats(cls, histogram):
1521 """Get the additional statistics from the histogram and return them a dict.
1522
1523 Parameters
1524 ----------
1525 histogram : derived from ROOT.TH1 or ROOT.TGraph
1526 Something having a GetListOfFunctions method that holds the additional statistics
1527
1528 Returns
1529 -------
1530 collection.OrderedDict
1531 A map of labels to values for the additional statistics
1532 """
1533 additional_stats = collections.OrderedDict()
1534 for tobject in histogram.GetListOfFunctions():
1535 if isinstance(tobject, StatsEntry):
1536 stats_entry = tobject
1537 label = stats_entry.GetName()
1538 value = stats_entry.GetVal()
1539 additional_stats[label] = value
1540 return additional_stats
1541
1542 @classmethod
1543 def gaus_slice_fit(cls, th2, name, z_score=None):
1544 """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1545 # profile = th2.ProfileX(name)
1546
1547 y_taxis = th2.GetYaxis()
1548 th2_lower_bound = y_taxis.GetXmin()
1549 th2_upper_bound = y_taxis.GetXmax()
1550 th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1551 n_y_bins = y_taxis.GetNbins()
1552 if z_score:
1553 y_mean = th2.GetMean(2)
1554 y_std = th2.GetStdDev(2)
1555 fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1556 fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1557 fit_height = fit_upper_bound - fit_lower_bound
1558 # Require all covered bins to be filled
1559 required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1560 else:
1561 fit_lower_bound = th2_lower_bound
1562 fit_upper_bound = th2_upper_bound
1563 fit_height = fit_upper_bound - fit_lower_bound
1564 required_n_bins_inslice_filled = n_y_bins / 1.61
1565
1566 # Highest required number of bins should be a third
1567 required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1568
1569 fit_tf1 = ROOT.TF1("Fit", "gaus", fit_lower_bound, fit_upper_bound)
1570 fit_tf1.SetParName(0, "n")
1571 fit_tf1.SetParName(1, "mean")
1572 fit_tf1.SetParName(2, "std")
1573 i_first_bin = 0
1574 i_last_bin = -1
1575 fit_options = "QNR"
1576 param_fit_th1s = ROOT.TObjArray()
1577 th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1578 int(required_n_bins_inslice_filled),
1579 fit_options, param_fit_th1s)
1580
1581 th1_means = param_fit_th1s.At(1)
1582 th1_means.SetName(name)
1583 th1_means.SetTitle(th2.GetTitle())
1584 for label, value in cls.get_additional_stats(th2).items():
1585 cls.add_stats_entry(th1_means, label, value)
1586
1587 # Manually copy labels grumble grumble
1588 x_taxis = th2.GetXaxis()
1589 new_x_taxis = th1_means.GetXaxis()
1590 for i_bin in range(x_taxis.GetNbins() + 2):
1591 label = x_taxis.GetBinLabel(i_bin)
1592 if label != "":
1593 new_x_taxis.SetBinLabel(i_bin, label)
1594
1595 # Adjust plot bound to reflect the fit range.
1596 data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1597 data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1598 data_height = data_upper_bound - data_lower_bound
1599
1600 plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1601 plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1602
1603 th1_means.SetMinimum(plot_lower_bound)
1604 th1_means.SetMaximum(plot_upper_bound)
1605
1606 return th1_means
1607
1608 @classmethod
1609 def cumulate(cls, histogram, cumulation_direction=None):
1610 """Cumulates the histogram inplace.
1611
1612 Parameters
1613 ----------
1614 histogram : ROOT.TH1 or ROOT.TProfile
1615 Filled histogram to be cumulated
1616 cumulation_direction : int, optional
1617 Direction is indicated by the sign.
1618 Positive means from left to right, negative means from right to left.
1619 If now cumulation direction is given return the histogram as is.
1620
1621 Returns
1622 -------
1623 ROOT.TH1
1624 Cumulated histogram potentially altered inplace.
1625 """
1626 if not cumulation_direction:
1627 return histogram
1628
1629 cumulate_backward = cumulation_direction < 0
1630 cumulate_forward = not cumulate_backward
1631
1632 if isinstance(histogram, ROOT.TH2):
1633 raise ValueError("Cannot cumulate a two dimensional histogram.")
1634
1635 if isinstance(histogram, ROOT.TH3):
1636 raise ValueError("Cannot cumulate a three dimensional histogram.")
1637
1638 if not isinstance(histogram, ROOT.TH1):
1639 raise ValueError("Can only cumulate a one dimensional histogram.")
1640
1641 if isinstance(histogram, ROOT.TProfile):
1642 tprofile = histogram
1643 # Turn the profile histogram into graph where we can set the new content and errors
1644 tgraph = cls.convert_tprofile_to_tgrapherrors(histogram)
1645 tgraph.SetName(tprofile.GetName())
1646
1647 n_bins = histogram.GetNbinsX()
1648 n_points = n_bins
1649 cumulated_content = 0.0
1650 cumulated_entries = 0
1651 cumulated_std = 0.0
1652
1653 # Always include the overflow bins.
1654 i_bins = list(range(0, n_bins + 2))
1655 if not cumulate_forward:
1656 i_bins = reversed(i_bins)
1657
1658 for i_bin in i_bins:
1659 i_point = i_bin - 1
1660 bin_content = tprofile.GetBinContent(i_bin)
1661 bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1662 bin_std = tprofile.GetBinError(i_bin)
1663
1664 if bin_entries != 0:
1665 cumulated_content = (
1666 1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1667 (cumulated_entries + bin_entries)
1668 )
1669
1670 cumulated_std = (
1671 math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1672 (cumulated_entries + bin_entries)
1673 )
1674
1675 cumulated_entries = cumulated_entries + bin_entries
1676 else:
1677 # empty bin does not add anything to the cumulation
1678 pass
1679
1680 if i_point >= 0 and i_point < n_points:
1681 x = tgraph.GetX()[i_point]
1682 # x = ROOT.Double()
1683 # y = ROOT.Double()
1684 # tgraph.GetPoint(i_point, x, y)
1685 tgraph.SetPoint(i_point, x, cumulated_content)
1686
1687 x_error = tgraph.GetErrorX(i_point)
1688 tgraph.SetPointError(i_point, x_error, cumulated_std)
1689 return tgraph
1690
1691 else:
1692 # Always include the overflow bins.
1693 n_bins = histogram.GetNbinsX()
1694 cumulated_content = 0.0
1695
1696 i_bins = list(range(0, n_bins + 2))
1697 if not cumulate_forward:
1698 i_bins = reversed(i_bins)
1699
1700 for i_bin in i_bins:
1701 bin_content = histogram.GetBinContent(i_bin)
1702 cumulated_content += bin_content
1703 histogram.SetBinContent(i_bin, cumulated_content)
1704
1705 return histogram
1706
1707 def determine_bin_edges(self,
1708 xs,
1709 stackbys=None,
1710 bins=None,
1711 lower_bound=None,
1712 upper_bound=None,
1713 outlier_z_score=None,
1714 include_exceptionals=True,
1715 allow_discrete=False):
1716 """Deducing bin edges from a data series.
1717
1718 Parameters
1719 ----------
1720 xs : numpy.ndarray (1d)
1721 Data point for which a binning should be found.
1722 stackbys : numpy.ndarray (1d)
1723 Categories of the data points to be distinguishable
1724 bins : list(float) or int or None, optional
1725 Preset bin edges or preset number of desired bins.
1726 The default, None, means the bound should be extracted from data.
1727 The rice rule is used the determine the number of bins.
1728 If a list of floats is given return them immediately.
1729 lower_bound : float or None, optional
1730 Preset lower bound of the binning range.
1731 The default, None, means the bound should be extracted from data.
1732 upper_bound : float or None, optional
1733 Preset upper bound of the binning range.
1734 The default, None, means the bound should be extracted from data.
1735 outlier_z_score : float or None, optional
1736 Threshold z-score of outlier detection.
1737 The default, None, means no outlier detection.
1738 include_exceptionals : bool, optional
1739 If the outlier detection is active this switch indicates,
1740 if values detected as exceptionally frequent shall be included
1741 nevertheless into the binning range. Default is True,
1742 which means exceptionally frequent values as included
1743 even if they are detected as outliers.
1744
1745 Returns
1746 -------
1747 np.array (1d), list(str)
1748 Pair of bin edges and labels deduced from the series.
1749 Second element is None if the series is not detected as discrete.
1750 """
1751 debug = get_logger().debug
1752 debug('Determine binning for plot named %s', self.name)
1753
1754 if bins == 'flat':
1755 # Special value for the flat distribution binning
1756 n_bins = None
1757
1758 elif isinstance(bins, collections.abc.Iterable):
1759 # Bins is considered as an array
1760 # Construct a float array forwardable to root.
1761 bin_edges = bins
1762 bin_edges = array.array('d', bin_edges)
1763 bin_labels = None
1764 return bin_edges, bin_labels
1765
1766 # If bins is not an iterable assume it is the number of bins or None
1767 elif bins is None:
1768 n_bins = None
1769 else:
1770 # Check that bins can be coerced to an integer.
1771 n_bins = int(bins)
1772
1773 # Do not allow negative bin numbers
1774 if not n_bins > 0:
1775 message = f'Cannot accept n_bins={bins} as number of bins, because it is not a number greater than 0.'
1776 raise ValueError(message)
1777
1778 # Coerce values to a numpy array. Do not copy if already a numpy array.
1779 xs = np.array(xs, copy=False)
1780
1781 if self.is_binary(xs) or (allow_discrete and self.is_discrete(xs)):
1782 # This covers also the case
1783 debug('Discrete binning values encountered')
1784 finite_xs = xs[np.isfinite(xs)]
1785 unique_xs = np.unique(finite_xs)
1786
1787 # Crop the unique values between the lower and upper bound
1788 if lower_bound is None:
1789 if len(unique_xs) == 0:
1790 if upper_bound is None:
1791 lower_bound = 0
1792 else:
1793 lower_bound = upper_bound - 1
1794 else:
1795 lower_bound = np.min(unique_xs)
1796 else:
1797 unique_xs = unique_xs[unique_xs >= lower_bound]
1798
1799 if upper_bound is None:
1800 if len(unique_xs) == 0:
1801 upper_bound = lower_bound + 1
1802 else:
1803 upper_bound = np.min(unique_xs)
1804 else:
1805 unique_xs = unique_xs[unique_xs <= upper_bound]
1806
1807 if n_bins is None:
1808 n_bins = len(unique_xs) or 1
1809
1810 if len(unique_xs) > 0 and n_bins >= len(unique_xs):
1811 # Construct a float array forwardable to root.
1812 bin_edges = array.array('d', unique_xs)
1813 format_bin_label = self.format_bin_label
1814 bin_labels = [format_bin_label(value) for value in bin_edges]
1815 bin_edges.append(bin_edges[-1] + 1)
1816 return bin_edges, bin_labels
1817
1818 else:
1819 # Ambiguous case what to do in case of a number of requested bins
1820 # that is lower than the number of unique values?
1821
1822 # Continue with an equistant binning for now.
1823 pass
1824
1825 debug('Lower bound %s', lower_bound)
1826 debug('Upper bound %s', upper_bound)
1827 debug('N bins %s', n_bins)
1828
1829 else:
1830 bin_range = self.determine_bin_range(xs,
1831 stackbys=stackbys,
1832 n_bins=n_bins,
1833 lower_bound=lower_bound,
1834 upper_bound=upper_bound,
1835 outlier_z_score=outlier_z_score,
1836 include_exceptionals=include_exceptionals)
1837
1838 n_bins, lower_bound, upper_bound = bin_range
1839
1840 n_bin_edges = n_bins + 1
1841 if lower_bound != upper_bound:
1842 if bins == "flat":
1843 debug("Creating flat distribution binning")
1844 percentiles = np.linspace(0.0, 100.0, n_bin_edges)
1845 bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], percentiles))
1846 else:
1847 # Correct the upper bound such that all values are strictly smaller than the upper bound
1848 # Make one step in single precision in the positive direction
1849 bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1850
1851 # Reinforce the upper and lower bound to be exact
1852 # Also expand the upper bound by an epsilon
1853 # to prevent the highest value in xs from going in the overflow bin
1854 bin_edges[0] = lower_bound
1855 bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1856 debug('Bins %s', bin_edges)
1857
1858 else:
1859 # Fall back if the array contains only one value
1860 bin_edges = [lower_bound, upper_bound + 1]
1861
1862 # Construct a float array forwardable to root.
1863 bin_edges = array.array('d', bin_edges)
1864 debug('Bins %s for %s', bin_edges, self.name)
1865 return bin_edges, None
1866
1867 def determine_bin_range(self,
1868 xs,
1869 stackbys=None,
1870 n_bins=None,
1871 lower_bound=None,
1872 upper_bound=None,
1873 outlier_z_score=None,
1874 include_exceptionals=True):
1875 """Calculates the number of bins, the lower bound and the upper bound from a given data series
1876 estimating the values that are not given.
1877
1878 If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1879 The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1880 mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1881 quantile range (IQR).
1882
1883 If additional include_exceptionals is true the method tries to find exceptional values in the series
1884 and always include them in the range if it finds any.
1885 Exceptional values means exact values that appear often in the series for whatever reason.
1886 Possible reasons include
1887 * Integral / default values
1888 * Failed evaluation conditions
1889 * etc.
1890 which should be not cropped away automatically if you are locking on the quality of your data.
1891
1892 Parameters
1893 ----------
1894 xs : numpy.ndarray (1d)
1895 Data point for which a binning should be found.
1896 stackbys : numpy.ndarray (1d)
1897 Categories of the data points to be distinguishable
1898 n_bins : int or None, optional
1899 Preset number of desired bins. The default, None, means the bound should be extracted from data.
1900 The rice rule is used the determine the number of bins.
1901 lower_bound : float or None, optional
1902 Preset lower bound of the binning range.
1903 The default, None, means the bound should be extracted from data.
1904 upper_bound : float or None, optional
1905 Preset upper bound of the binning range.
1906 The default, None, means the bound should be extracted from data.
1907 outlier_z_score : float or None, optional
1908 Threshold z-score of outlier detection.
1909 The default, None, means no outlier detection.
1910 include_exceptionals : bool, optional
1911 If the outlier detection is active this switch indicates,
1912 if values detected as exceptionally frequent shall be included
1913 nevertheless into the binning range. Default is True,
1914 which means exceptionally frequent values as included
1915 even if they are detected as outliers.
1916
1917 Returns
1918 -------
1919 n_bins, lower_bound, upper_bound : int, float, float
1920 A triple of found number of bins, lower bound and upper bound of the binning range.
1921 """
1922
1923 if stackbys is not None:
1924 unique_stackbys = np.unique(stackbys)
1925 stack_ranges = []
1926 for value in unique_stackbys:
1927 if np.isnan(value):
1928 indices_for_value = np.isnan(stackbys)
1929 else:
1930 indices_for_value = stackbys == value
1931
1932 stack_lower_bound, stack_upper_bound = \
1933 self.determine_range(xs[indices_for_value],
1934 lower_bound=lower_bound,
1935 upper_bound=upper_bound,
1936 outlier_z_score=outlier_z_score,
1937 include_exceptionals=include_exceptionals)
1938
1939 stack_ranges.append([stack_lower_bound, stack_upper_bound])
1940
1941 lower_bound = np.nanmin([lwb for lwb, upb in stack_ranges])
1942 upper_bound = np.nanmax([upb for lwb, upb in stack_ranges])
1943
1944 else:
1945 lower_bound, upper_bound = self.determine_range(xs,
1946 lower_bound=lower_bound,
1947 upper_bound=upper_bound,
1948 outlier_z_score=outlier_z_score,
1949 include_exceptionals=include_exceptionals)
1950
1951 if n_bins is None:
1952 # Assume number of bins according to the rice rule.
1953 # The number of data points should not include outliers.
1954 n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1955 rice_n_bins = int(statistics.rice_n_bin(n_data))
1956 n_bins = rice_n_bins
1957
1958 else:
1959 n_bins = int(n_bins)
1960 # Do not allow negative bin numbers
1961 if not n_bins > 0:
1962 message = f'Cannot accept n_bins={n_bins} as number of bins, because it is not a number greater than 0.'
1963 raise ValueError(message)
1964
1965 return n_bins, lower_bound, upper_bound
1966
1967 def determine_range(self,
1968 xs,
1969 lower_bound=None,
1970 upper_bound=None,
1971 outlier_z_score=None,
1972 include_exceptionals=True):
1973 """
1974 Parameters
1975 ----------
1976 xs : numpy.ndarray (1d)
1977 Data point for which a binning should be found.
1978 lower_bound : float or None, optional
1979 Preset lower bound of the binning range.
1980 The default, None, means the bound should be extracted from data.
1981 upper_bound : float or None, optional
1982 Preset upper bound of the binning range.
1983 The default, None, means the bound should be extracted from data.
1984 outlier_z_score : float or None, optional
1985 Threshold z-score of outlier detection.
1986 The default, None, means no outlier detection.
1987 include_exceptionals : bool, optional
1988 If the outlier detection is active this switch indicates,
1989 if values detected as exceptionally frequent shall be included
1990 nevertheless into the binning range. Default is True,
1991 which means exceptionally frequent values as included
1992 even if they are detected as outliers.
1993
1994 Returns
1995 -------
1996 lower_bound, upper_bound : float, float
1997 A pair of found lower bound and upper bound of series.
1998 """
1999 debug = get_logger().debug
2000
2001 finite_xs_indices = np.isfinite(xs)
2002 if np.any(finite_xs_indices):
2003 finite_xs = xs[finite_xs_indices]
2004 else:
2005 finite_xs = xs
2006
2007 make_symmetric = False
2008 exclude_outliers = outlier_z_score is not None and (lower_bound is None or upper_bound is None)
2009
2010 # Look for exceptionally frequent values in the series, e.g. integral delta values like -999
2011 if include_exceptionals or exclude_outliers:
2012 exceptional_xs = self.get_exceptional_values(finite_xs)
2013 exceptional_indices = np.in1d(finite_xs, exceptional_xs)
2014
2015 # Prepare for the estimation of outliers
2016 if exclude_outliers:
2017 if not np.all(exceptional_indices):
2018 # Exclude exceptional values from the estimation to be unbiased
2019 # even in case exceptional values fall into the central region near the mean
2020 x_mean, x_std = self.get_robust_mean_and_std(finite_xs[~exceptional_indices])
2021 else:
2022 x_mean, x_std = np.nan, np.nan
2023
2024 make_symmetric = abs(x_mean) < x_std / 5.0 and lower_bound is None and upper_bound is None
2025
2026 if include_exceptionals and len(exceptional_xs) != 0:
2027 lower_exceptional_x = np.min(exceptional_xs)
2028 upper_exceptional_x = np.max(exceptional_xs)
2029 make_symmetric = False
2030 else:
2031 lower_exceptional_x = np.nan
2032 upper_exceptional_x = np.nan
2033
2034 # Find the lower bound, if it is not given.
2035 if lower_bound is None:
2036 try:
2037 lower_bound = np.min(finite_xs)
2038 except ValueError:
2039 lower_bound = -999
2040 # Clip the lower bound by outliers that exceed the given z score
2041 if outlier_z_score is not None:
2042 # The lower bound at which outliers exceed the given z score
2043 lower_outlier_bound = x_mean - outlier_z_score * x_std
2044
2045 # Clip the lower bound such that it concides with an actual value,
2046 # which prevents empty bins from being produced
2047 indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2048
2049 if np.any(indices_above_lower_outlier_bound):
2050 lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2051
2052 # However we want to include at least the exceptional values in the range if there are any.
2053 lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2054
2055 debug('Lower bound after outlier detection')
2056 debug('Lower bound %s', lower_bound)
2057 debug('Lower outlier bound %s', lower_outlier_bound)
2058
2059 # Find the upper bound, if it is not given
2060 if upper_bound is None:
2061 try:
2062 upper_bound = np.max(finite_xs)
2063 except ValueError:
2064 upper_bound = 999
2065 if outlier_z_score is not None:
2066 # The upper bound at which outliers exceed the given z score
2067 upper_outlier_bound = x_mean + outlier_z_score * x_std
2068
2069 # Clip the upper bound such that it concides with an actual value,
2070 # which prevents empty bins from being produced
2071 indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2072
2073 if np.any(indices_below_upper_outlier_bound):
2074 upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2075
2076 # However we want to include at least the exceptional values in the range if there are any.
2077 upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2078
2079 debug('Upper bound after outlier detection')
2080 debug('Upper bound %s', upper_bound)
2081 debug('Upper outlier bound %s', upper_outlier_bound)
2082
2083 if make_symmetric and lower_bound < 0 and upper_bound > 0:
2084 if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2085 abs_bound = max(abs(lower_bound), abs(upper_bound))
2086 lower_bound = -abs_bound
2087 upper_bound = abs_bound
2088
2089 return lower_bound, upper_bound
2090
2091 @classmethod
2092 def set_additional_stats_tf1(cls, histogram):
2093 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2094
2095 Parameters
2096 ----------
2097 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2098 Something having a GetListOfFunctions method that should hold
2099 the combined fit and additional statistics function.
2100 """
2101 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2102 cls.set_tf1(histogram, additional_stats_tf1)
2103
2104 @classmethod
2105 def set_fit_tf1(cls, histogram, fit_tf1):
2106 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2107
2108 Parameters
2109 ----------
2110 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2111 Something having a GetListOfFunctions method that should hold
2112 the combined fit and additional statistics function.
2113 """
2114 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2115 combined_tf1 = cls.combine_fit_and_additional_stats(fit_tf1, additional_stats_tf1)
2116 cls.set_tf1(histogram, combined_tf1)
2117
2118 @classmethod
2119 def set_tf1(cls, histogram, tf1):
2120 """Set the attached TF1 of the histogram.
2121
2122 Parameters
2123 ----------
2124 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2125 Something having a GetListOfFunctions method that should hold
2126 the combined fit and additional statistics function.
2127 """
2128 # Delete any functions formally added
2129 cls.delete_tf1(histogram)
2130 if tf1 is not None:
2131 tf1.SetName("FitAndStats")
2132 histogram.GetListOfFunctions().Add(tf1)
2133
2134 @classmethod
2135 def delete_tf1(cls, histogram):
2136 """Delete the attached TF1 from the histogram
2137
2138 Parameters
2139 ----------
2140 histogram : ROOT.TH1 or ROOT.TGraph
2141 Something having a GetListOfFunctions method that holds the fit function
2142 """
2143 tf1 = histogram.FindObject("FitAndStats")
2144 if tf1:
2145 function_list = histogram.GetListOfFunctions()
2146 function_list.Remove(tf1)
2147
2148 @classmethod
2149 def create_additional_stats_tf1(cls, histogram):
2150 """Create a TF1 with the additional statistics from the histogram as parameters.
2151
2152 Parameters
2153 ----------
2154 histogram : ROOT.TH1 or ROOT.TGraph
2155 Something having a GetListOfFunctions method that holds the additional statistics.
2156
2157 Returns
2158 -------
2159 ROOT.TF1
2160 Function with the additional statistics as parameters.
2161 """
2162
2163 additional_stats = cls.get_additional_stats(histogram)
2164 if not additional_stats:
2165 return None
2166
2167 # Create dummy function, which displays additional statistics in the legend, when added to a histogram.
2168 # Dummy range to serve the functions
2169 lower_bound = 0
2170 upper_bound = 0
2171
2172 # Create a formula which is zero in all cases but has space for n parameters
2173 # Formula string looks like 0*[0]+0*[1]+0*[2]+...
2174 formula_string = '+'.join('0*[' + str(i) + ']' for i in range(len(additional_stats)))
2175
2176 # Compose a function that carries the additional information
2177 additional_stats_tf1 = ROOT.TF1("Stats", formula_string, lower_bound, upper_bound)
2178
2179 for (i, (label, value)) in enumerate(additional_stats.items()):
2180 # root 6 does not like labels with spaces ..
2181 # bug report: https://sft.its.cern.ch/jira/browse/ROOT-7460
2182 # therefore this workaround:
2183 label = label.replace(" ", "-")
2184 additional_stats_tf1.SetParName(i, label)
2185 additional_stats_tf1.FixParameter(i, value)
2186
2187 return additional_stats_tf1
2188
2189 @classmethod
2190 def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1):
2191 """Combine the fit function and the function carrying the additional statistics to one function.
2192
2193 Parameters
2194 ----------
2195 fit_tf1 : ROOT.TF1
2196 The fit function
2197 additional_stats_tf1 : ROOT.TF1
2198 The function carrying the additional statistics as parameters
2199
2200 Returns
2201 -------
2202 ROOT.TF1
2203 """
2204 if additional_stats_tf1 is None:
2205 return fit_tf1
2206
2207 # Combine both TF1 functions
2208 # Get the lower and upper bound of the fit
2209 # Use the pass-by reference containers from pyROOT to be able to call the function
2210 lower_bound = ctypes.c_double()
2211 upper_bound = ctypes.c_double()
2212 fit_tf1.GetRange(lower_bound, upper_bound)
2213 title = fit_tf1.GetTitle()
2214
2215 combined_formula = additional_stats_tf1.GetExpFormula().Data() + '+' + fit_tf1.GetExpFormula().Data()
2216 combined_tf1 = ROOT.TF1("Combined", combined_formula, lower_bound.value, upper_bound.value)
2217 combined_tf1.SetTitle(title)
2218
2219 # Transfer the fitted parameters
2220 chi2 = fit_tf1.GetChisquare()
2221 combined_tf1.SetChisquare(chi2)
2222
2223 ndf = fit_tf1.GetNDF()
2224 combined_tf1.SetNDF(ndf)
2225
2226 n_stats_parameters = additional_stats_tf1.GetNpar()
2227 # n_fit_parameters = fit_tf1.GetNpar()
2228 cls.copy_tf1_parameters(additional_stats_tf1, combined_tf1)
2229 cls.copy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2230
2231 return combined_tf1
2232
2233 @classmethod
2234 def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0):
2235 """Copy the parameters of one TF1 to another.
2236
2237 Parameters
2238 ----------
2239 tf1_source : ROOT.TF1
2240 Take parameters from here
2241 tf1_target : ROOT.TF1
2242 Copy them to here.
2243 offset : int, optional
2244 Index of the first target parameter to which to copy.
2245 """
2246 n_parameters = tf1_source.GetNpar()
2247
2248 # Helper variables for pyROOT's mechanism to call functions by reference
2249 lower_bound = ctypes.c_double()
2250 upper_bound = ctypes.c_double()
2251
2252 for i_source in range(n_parameters):
2253 parameter_name = tf1_source.GetParName(i_source)
2254 i_target = tf1_target.GetParNumber(parameter_name)
2255
2256 # Workaround for a ROOT bug
2257 if i_target == -1:
2258 for i_target in range(tf1_target.GetNpar()):
2259 if parameter_name == tf1_target.GetParName(i_target):
2260 break
2261 else:
2262 i_target = -1
2263 continue
2264
2265 tf1_target.SetParameter(i_target,
2266 tf1_source.GetParameter(i_source))
2267 tf1_target.SetParError(i_target,
2268 tf1_source.GetParError(i_source))
2269
2270 tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2271 tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2272
2273 def attach_attributes(self):
2274 """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2275 # Forward the attributes to the plot by auto assignment
2276
2277 self.check = self.check
2278
2279 self.contact = self.contact
2280
2281 self.description = self.description
2282
2283
2284 self.xlabel = self.xlabel
2285
2286 self.ylabel = self.ylabel
2287
2288 self.title = self.title
2289
2290 def set_maximum(self, maximum):
2291 """Sets the maximum of the vertical plotable range"""
2292 for histogram in self.histograms:
2293 if isinstance(histogram, ROOT.TH1):
2294 histogram.SetMaximum(histogram.GetMaximum(maximum))
2295 else:
2296 histogram.SetMaximum(maximum)
2297
2298 def set_minimum(self, minimum):
2299 """Sets the minimum of the vertical plotable range"""
2300 for histogram in self.histograms:
2301 if isinstance(histogram, ROOT.TH1):
2302 histogram.SetMinimum(histogram.GetMinimum(minimum))
2303 else:
2304 histogram.SetMinimum(minimum)
2305
2306 @classmethod
2307 def set_tstyle(cls):
2308 """Set the style such that the additional stats entries are shown by the TBrowser"""
2309 belle2_validation_style_name = "belle2_validation_style"
2310 belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2311 if not belle2_validation_tstyle:
2312 belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2313
2314 opt_fit = 112
2315 belle2_validation_tstyle.SetOptFit(opt_fit)
2316
2317 opt_stat = 111111
2318 belle2_validation_tstyle.SetOptStat(opt_stat)
2319 ROOT.gROOT.SetStyle(belle2_validation_style_name)
2320
2321 # belle2_validation_tstyle.SetLineStyleString(cls.very_sparse_dots_line_style_index, "4 2000")
2322
2323 else:
2324 belle2_validation_tstyle.cd()
2325
2326
2327def test():
2328 """Simple test method"""
2329 ValidationPlot.set_tstyle()
2330
2331 # Test a histogram plot with some nan and inf values
2332 normal_distributed_values = np.random.randn(1000)
2333
2334 for i in range(10):
2335 normal_distributed_values[i] = np.nan
2336
2337 for i in range(10, 20):
2338 normal_distributed_values[i] = np.inf
2339
2340 for i in range(20, 30):
2341 normal_distributed_values[i] = -np.inf
2342
2343 validation_histogram = ValidationPlot('test_hist')
2344 validation_histogram.hist(normal_distributed_values)
2345 validation_histogram.title = 'A normal distribution'
2346 validation_histogram.xlabel = 'normal'
2347 validation_histogram.ylabel = 'frequency'
2348 validation_histogram.fit_gaus()
2349
2350 # Test for a cumulated histogram - cumulation from left to right
2351 cumulated_histogram = ValidationPlot('test_cum_hist')
2352 cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2353 cumulated_histogram.title = 'A cumulated normal distribution'
2354 cumulated_histogram.xlabel = 'normal'
2355 cumulated_histogram.ylabel = 'cdf'
2356 cumulated_histogram.show()
2357
2358 # Test stacked histogram
2359 # Make a random selection of 40%
2360 stackby = np.random.binomial(1.0, 0.40, 1000)
2361 stacked_validation_histogram = ValidationPlot('test_stacked_hist')
2362 stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2363
2364 # Make a scatterplot with two species.
2365 x = np.random.randn(1000)
2366 y = 3 * np.random.randn(1000)
2367 ones = np.ones_like(x)
2368 angle = np.pi / 2
2369
2370 x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2371 y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2372
2373 stacked_validation_scatter = ValidationPlot('test_stacked_scatter')
2374 stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2375
2376 # Make a profile plot with the two species
2377 stacked_validation_profile = ValidationPlot('test_stacked_profile')
2378 stacked_validation_profile.profile(x1, y1, stackby=stackby)
2379
2380 # Make a two dimensional histogram with two species
2381 stacked_validation_hist2d = ValidationPlot('test_stacked_hist2d')
2382 stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2383
2384 # Test a profile with a diagonal fit
2385 x = np.linspace(-1, 1, 1000)
2386 y = x.copy()
2387 x[0] = np.nan
2388 diagonal_plot = ValidationPlot('test_diag')
2389 diagonal_plot.profile(x, y, bins=50)
2390 diagonal_plot.fit_line()
2391
2392 # Test if cumulation works for profile plots
2393 cumulated_profile = ValidationPlot('test_cum_profile')
2394 cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2395
2396 tfile = ROOT.TFile('test.root', 'RECREATE')
2397
2398 validation_histogram.write(tfile)
2399
2400 with root_cd("expert") as tdirectory1:
2401 diagonal_plot.write(tdirectory1)
2402 cumulated_profile.write(tdirectory1)
2403 cumulated_histogram.write(tdirectory1)
2404
2405 with root_cd("stacked") as tdirectory2:
2406 stacked_validation_histogram.write(tdirectory2)
2407 stacked_validation_scatter.write()
2408 stacked_validation_profile.write()
2409 stacked_validation_hist2d.write()
2410
2411 tfile.Close()
2412
2413 tfile = ROOT.TFile('test.root')
2414 tBrowser = ROOT.TBrowser()
2415 tBrowser.BrowseObject(tfile)
2416 input()
2417 tfile.Close()
2418
2419
2420if __name__ == '__main__':
2421 test()
2422
2423# @endcond