Belle II Software development
plot.py
1#!/usr/bin/env python3
2
3
10
11import sys
12import math
13import collections
14import array
15import numpy as np
16import ctypes
17
18import ROOT
19
20import basf2
21
22from tracking.root_utils import root_cd, root_save_name
23
24from tracking.validation import statistics
25
26import logging
27
28
29def get_logger():
30 """Get for the logging.Logger instance of this module
31
32 Returns
33 -------
34 logging.Logger
35 Logger instance of this module
36 """
37 return logging.getLogger(__name__)
38
39
40
41units_by_quantity_name = {
42 'x': 'cm',
43 'y': 'cm',
44 'z': 'cm',
45 'E': 'GeV',
46 'px': 'GeV',
47 'p_{x}': 'GeV',
48 'py': 'GeV',
49 'p_{y}': 'GeV',
50 'pz': 'GeV',
51 'p_{z}': 'GeV',
52 'pt': 'GeV',
53 'p_{t}': 'GeV',
54 'd0': 'cm',
55 'd_{0}': 'cm',
56 'phi0': None,
57 '#phi_{0}': None,
58 'omega': '1/cm',
59 '#omega': '1/cm',
60 'z0': 'cm',
61 'z_{0}': 'cm',
62 'tan_lambda': None,
63 'tan #lambda': None}
64
65
66def get_unit(quantity_name):
67 """Infers the unit of a quantity from its name.
68
69 Assumes the standard Belle II unit system.
70
71 Currently looks up the quantity string from units_by_quantity_name.
72
73 Parameters
74 ----------
75 quantity_name : str
76 Name of a quantity (E.g. pt, x, ...)
77
78 Returns
79 -------
80 str
81 """
82
83 unit = units_by_quantity_name.get(quantity_name, None)
84 return unit
85
86
87def compose_axis_label(quantity_name, unit=None):
88 """Formats a quantity name and a unit to a label for a plot axes.
89
90 If the unit is not given to is tried to infer it
91 from the quantity name by the get_unit function.
92
93 Parameters
94 ----------
95 quantity_name : str
96 Name of the quantity to be displayed at the axes
97 unit : str, optional
98 The unit of the quantity. Defaults to get_unit(quantity_name)
99
100 Returns
101 -------
102 str
103 """
104
105 if unit is None:
106 unit = get_unit(quantity_name)
107
108 if unit is None:
109 axis_label = quantity_name
110 else:
111 axis_label = f'{quantity_name} ({unit})'
112
113 return axis_label
114
115
116def get1DBinningFromReference(name, refFileName):
117 """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
118
119 @param name : name of the TH1 object to be looked for in the file
120 @param refFileName : name of the reference file where the object is searched for
121
122 @return int nbin, float xmin, float xmax of the TH1
123 """
124
125 nbins = None
126 x_min = None
127 x_max = None
128
129 if refFileName is None or refFileName == "":
130 return nbins, x_min, x_max
131
132 # store current directory to not confuse directories by opening a TFile
133 oldDirectory = ROOT.gROOT.CurrentDirectory().load()
134
135 tfile = ROOT.TFile(refFileName)
136 if tfile.IsOpen():
137 objptr = tfile.Get(name)
138 if objptr and objptr.InheritsFrom("TH1"):
139 nbins = objptr.GetNbinsX()
140 x_min = objptr.GetXaxis().GetXmin()
141 x_max = objptr.GetXaxis().GetXmax()
142 else:
143 basf2.B2WARNING('Requested object with name: ' + name + ' not found in file: ' + refFileName + " (or not a TH1)")
144 else:
145 basf2.B2WARNING('Requested file: ' + refFileName + ' could not be opened')
146
147 tfile.Close()
148
149 # set current directory back to original one
150 oldDirectory.cd()
151
152 return nbins, x_min, x_max
153
154
155
156StatsEntry = ROOT.TParameter(float)
157
158
160
161 """Class for generating a validation plot for the Belle II validation page.
162
163 Typically it generates plots from values stored in numpy arrays and feeds them into
164 plot ROOT classes for storing them.
165
166 It implements an automatic binning procedure based on the rice rule and
167 robust z score outlier detection.
168
169 It also keeps track of additional statistics typically neglected by ROOT such as a count
170 for the non finit values such as NaN, +Inf, -Inf.
171
172 The special attributes for the Belle II validation page like
173 * title
174 * contract
175 * description
176 * check
177 are exposed as properties of this class.
178 """
179
180
181 very_sparse_dots_line_style_index = 28
182 # This line style is defined in the set_tstyle method below.
183
184 def __init__(self, name, referenceFileName=None):
185 """Constructor of the ValidationPlot
186
187 Parameters
188 ----------
189 name : str
190 A unique name to be used as the name of the ROOT object to be generated
191
192 referenceFileName : str
193 name of a reference file. If set the code will try to get the histogram or profile
194 from that file and determine the number of bins and upper and lower bound
195 (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
196 """
197
198
199 self.name = root_save_name(name)
200
201
202 self.referenceFileName = referenceFileName
203
204
205 self._description = ''
206
207
208 self._check = ''
209
210
211 self._contact = ''
212
213
214 self._xlabel = ''
215
216
217 self._ylabel = ''
218
219
220 self._title = ''
221
222
223 self.plot = None
224
225
226 self.histograms = []
227
228
229 self._is_expert = True
230
231
232 self.pvalue_warn = None
233
234
235 self.pvalue_error = None
236
237
238 self.y_log = False
239
240 def hist(self,
241 xs,
242 weights=None,
243 stackby=None,
244 bins=None,
245 lower_bound=None,
246 upper_bound=None,
247 outlier_z_score=None,
248 include_exceptionals=True,
249 allow_discrete=False,
250 cumulation_direction=None,
251 is_expert=True):
252 """Fill the plot with a one dimensional histogram."""
253
254 # if referenceFileName was set the binning will taken from there
255 if self.referenceFileName is not None:
256 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
257 if n is not None and xmin is not None and xmax is not None:
258 bins = n
259 upper_bound = xmax
260 lower_bound = xmin
261
262 th1_factory = ROOT.TH1D
263 self._is_expert = is_expert
264
265 self.create_1d(th1_factory,
266 xs,
267 weights=weights,
268 stackby=stackby,
269 bins=bins,
270 lower_bound=lower_bound,
271 upper_bound=upper_bound,
272 outlier_z_score=outlier_z_score,
273 include_exceptionals=include_exceptionals,
274 allow_discrete=allow_discrete,
275 cumulation_direction=cumulation_direction)
276
277 if not self.ylabelylabelylabel:
278
279 self.ylabelylabelylabel = 'count'
280
281 return self
282
283 def profile(self,
284 xs,
285 ys,
286 weights=None,
287 stackby=None,
288 bins=None,
289 lower_bound=None,
290 upper_bound=None,
291 y_binary=None,
292 y_log=None,
293 outlier_z_score=None,
294 include_exceptionals=True,
295 allow_discrete=False,
296 cumulation_direction=None,
297 gaus_z_score=None,
298 is_expert=True,
299 is_asymmetry=False):
300 """Fill the plot with a one dimensional profile of one variable over another."""
301
302 # if referenceFileName was set the binning will taken from there
303 if self.referenceFileName is not None:
304 n = None
305 xmin = None
306 xmax = None
307 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
308 if n is not None and xmin is not None and xmax is not None:
309 bins = n
310 upper_bound = xmax
311 lower_bound = xmin
312
313 th1_factory = ROOT.TProfile
314 self._is_expert = is_expert
315 if gaus_z_score is None:
316 self.create_1d(th1_factory,
317 xs,
318 ys,
319 weights=weights,
320 stackby=stackby,
321 bins=bins,
322 lower_bound=lower_bound,
323 upper_bound=upper_bound,
324 outlier_z_score=outlier_z_score,
325 include_exceptionals=include_exceptionals,
326 allow_discrete=allow_discrete,
327 cumulation_direction=cumulation_direction)
328 else:
329 # Introduce a dummy name for the temporary two dimensional histogram
330 self.name = "_" + self.name
331 self.hist2d(xs, ys=ys, weights=weights, stackby=stackby,
332 bins=(bins, None),
333 lower_bound=(lower_bound, None),
334 upper_bound=(upper_bound, None),
335 outlier_z_score=(outlier_z_score, outlier_z_score),
336 include_exceptionals=(include_exceptionals, True),
337 allow_discrete=(allow_discrete, False),
338 is_expert=is_expert)
339
340 self.name = self.name[1:]
341 profiles = []
342 for histogram in self.histograms:
343 profile = self.gaus_slice_fit(histogram,
344 name=histogram.GetName()[1:],
345 z_score=gaus_z_score)
346 profiles.append(profile)
347 self.histograms = profiles
348 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False)
349
350 if y_log:
351 self.y_log = True
352
353 if y_binary or self.is_binary(ys):
354 if not self.ylabelylabelylabel:
355 self.ylabelylabelylabel = 'probability'
356
357 min_y = 0
358 if (is_asymmetry):
359 min_y = -1.05
360 for histogram in self.histograms:
361 histogram.SetMinimum(min_y)
362 histogram.SetMaximum(1.05)
363
364 self.plot.SetMinimum(min_y)
365 self.plot.SetMaximum(1.05)
366
367 return self
368
369 def scatter(self,
370 xs,
371 ys,
372 stackby=None,
373 lower_bound=(None, None),
374 upper_bound=(None, None),
375 outlier_z_score=(None, None),
376 include_exceptionals=(True, True),
377 max_n_data=100000,
378 is_expert=True):
379 """Fill the plot with a (unbinned) two dimensional scatter plot"""
380
381 self._is_expert = is_expert
382
383 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
384 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
385 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
386 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
387
388 x_lower_bound, x_upper_bound = self.determine_range(
389 xs,
390 lower_bound=x_lower_bound,
391 upper_bound=x_upper_bound,
392 outlier_z_score=x_outlier_z_score,
393 include_exceptionals=x_include_exceptionals
394 )
395
396 y_lower_bound, y_upper_bound = self.determine_range(
397 ys,
398 lower_bound=y_lower_bound,
399 upper_bound=y_upper_bound,
400 outlier_z_score=y_outlier_z_score,
401 include_exceptionals=y_include_exceptionals
402 )
403
404 graph = ROOT.TGraph()
405
406 graph.SetName(self.name)
407 graph.SetMarkerStyle(6)
408 graph.GetHistogram().SetOption("AP") # <-- this has no effect?
409
410 # Trying to make the graph lines invisible
411 color_index = 0 # white
412 # Transparent white
413 graph.SetLineColorAlpha(color_index, 0)
414 graph.SetLineStyle(self.very_sparse_dots_line_style_index)
415
416 # Transport the lower and upper bound as ranges of the axis
417 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
418 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
419
420 self.create(graph,
421 xs=xs,
422 ys=ys,
423 stackby=stackby,
424 reverse_stack=False)
425
426 return self
427
428 def grapherrors(self,
429 xs_and_err,
430 ys_and_err,
431 stackby=None,
432 lower_bound=(None, None),
433 upper_bound=(None, None),
434 outlier_z_score=(None, None),
435 include_exceptionals=(True, True),
436 max_n_data=100000,
437 is_expert=True):
438 """Fill the plot with a (unbinned) two dimensional scatter plot
439 xs_and_err and ys_and_err are tuples containing the values and the errors on these values
440 as numpy arrays
441 """
442
443 self._is_expert = is_expert
444
445 xs = xs_and_err[0]
446 ys = ys_and_err[0]
447
448 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
449 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
450 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
451 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
452
453 x_lower_bound, x_upper_bound = self.determine_range(
454 xs,
455 lower_bound=x_lower_bound,
456 upper_bound=x_upper_bound,
457 outlier_z_score=x_outlier_z_score,
458 include_exceptionals=x_include_exceptionals
459 )
460
461 y_lower_bound, y_upper_bound = self.determine_range(
462 ys,
463 lower_bound=y_lower_bound,
464 upper_bound=y_upper_bound,
465 outlier_z_score=y_outlier_z_score,
466 include_exceptionals=y_include_exceptionals
467 )
468
469 graph = ROOT.TGraphErrors()
470
471 graph.SetName(self.name)
472 graph.GetHistogram().SetOption("A")
473
474 graph.SetMarkerColor(4)
475 graph.SetMarkerStyle(21)
476
477 # Transport the lower and upper bound as ranges of the axis
478 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
479 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
480
481 self.create(graph,
482 xs=xs,
483 ys=ys,
484 stackby=stackby,
485 reverse_stack=False)
486
487 return self
488
489 def hist2d(self,
490 xs,
491 ys,
492 weights=None,
493 stackby=None,
494 bins=(None, None),
495 lower_bound=(None, None),
496 upper_bound=(None, None),
497 outlier_z_score=(None, None),
498 include_exceptionals=(True, True),
499 allow_discrete=(False, False),
500 quantiles=None,
501 is_expert=True):
502 """Fill the plot with a two dimensional histogram"""
503
504 name = self.name
505 # Introduce a dummy name for the temporary two dimensional histogram
506 if quantiles is not None:
507 name = "_" + self.name
508
509 x_bins, y_bins = self.unpack_2d_param(bins)
510 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
511 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
512 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
513 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
514 x_allow_discrete, y_allow_discrete = self.unpack_2d_param(allow_discrete)
515
516 if quantiles is not None:
517 y_include_exceptionals = True
518 y_allow_discrete = False
519
520 x_bin_edges, x_bin_labels = self.determine_bin_edges(xs,
521 stackbys=stackby,
522 bins=x_bins,
523 lower_bound=x_lower_bound,
524 upper_bound=x_upper_bound,
525 outlier_z_score=x_outlier_z_score,
526 include_exceptionals=x_include_exceptionals,
527 allow_discrete=x_allow_discrete)
528
529 y_bin_edges, y_bin_labels = self.determine_bin_edges(ys,
530 stackbys=stackby,
531 bins=y_bins,
532 lower_bound=y_lower_bound,
533 upper_bound=y_upper_bound,
534 outlier_z_score=y_outlier_z_score,
535 include_exceptionals=y_include_exceptionals,
536 allow_discrete=y_allow_discrete)
537
538 n_x_bins = len(x_bin_edges) - 1
539 n_y_bins = len(y_bin_edges) - 1
540
541 self.lower_bound = (x_bin_edges[0], y_bin_edges[0])
542
543 self.upper_bound = (x_bin_edges[-1], y_bin_edges[-1])
544
545 histogram = ROOT.TH2D(name,
546 '',
547 n_x_bins,
548 x_bin_edges,
549 n_y_bins,
550 y_bin_edges)
551
552 if x_bin_labels:
553 get_logger().info("Scatter plot %s is discrete in x.", name)
554 x_taxis = histogram.GetXaxis()
555 for i_x_bin, x_bin_label in enumerate(x_bin_labels):
556 x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
557 self.add_stats_entry(histogram, "dx", 1)
558
559 else:
560 x_bin_width = x_bin_edges[1] - x_bin_edges[0]
561 self.add_stats_entry(histogram, "dx", x_bin_width)
562
563 if y_bin_labels:
564 get_logger().info("Scatter plot %s is discrete in y.", name)
565 y_taxis = histogram.GetYaxis()
566 for i_y_bin, y_bin_label in enumerate(y_bin_labels):
567 y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
568 self.add_stats_entry(histogram, "dy", 1)
569
570 else:
571 y_bin_width = y_bin_edges[1] - y_bin_edges[0]
572 self.add_stats_entry(histogram, "dy", y_bin_width)
573
574 self.create(histogram, xs, ys=ys, weights=weights, stackby=stackby)
575
576 if quantiles is not None:
577 self.name = self.name[1:]
578 profiles = []
579 for histogram in self.histograms:
580 for quantile in quantiles:
581 profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] + '_' + str(quantile))
582
583 # Manually copy labels grumble grumble
584 x_taxis = histogram.GetXaxis()
585 new_x_taxis = profile.GetXaxis()
586 for i_bin in range(x_taxis.GetNbins() + 2):
587 label = x_taxis.GetBinLabel(i_bin)
588 if label != "":
589 new_x_taxis.SetBinLabel(i_bin, label)
590
591 # Remove faulty error values)
592 epsilon = sys.float_info.epsilon
593 for i_bin in range(0, profile.GetNbinsX() + 2):
594 profile.SetBinError(i_bin, epsilon)
595
596 profiles.append(profile)
597
598 self.histograms = profiles
599 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=False, force_graph=True)
600
601 # Adjust the discrete bins after the filling to be equidistant
602 if x_bin_labels:
603 for histogram in self.histograms:
604 x_taxis = histogram.GetXaxis()
605 x_bin_edges = array.array("d", list(range(len(x_bin_labels) + 1)))
606 x_taxis.Set(n_x_bins, x_bin_edges)
607
608 # Adjust the discrete bins after the filling to be equidistant
609 if y_bin_labels:
610 for histogram in self.histograms:
611 x_taxis = histogram.GetXaxis()
612 y_bin_edges = array.array("d", list(range(len(y_bin_labels) + 1)))
613 y_taxis.Set(n_y_bins, y_bin_edges)
614
615 return self
616
617 def fit_gaus(self, z_score=None):
618 """Fit a Gaus bell curve to the central portion of a one dimensional histogram
619
620 The fit is applied to the central mean +- z_score * std interval of the histogram,
621 such that it is less influence by non gaussian tails further away than the given z score.
622
623 @param float z_score number of sigmas to include from the mean value of the histogram.
624 """
625 title = "gaus"
626 formula = "gaus"
627
628 plot = self.plot
629 if plot is None:
630 raise RuntimeError('Validation plot must be filled before it can be fitted.')
631
632 if not isinstance(plot, ROOT.TH1D):
633 raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
634
635 histogram = plot
636
637 fit_tf1 = ROOT.TF1("Fit", formula)
638 fit_tf1.SetTitle(title)
639 fit_tf1.SetParName(0, "n")
640 fit_tf1.SetParName(1, "mean")
641 fit_tf1.SetParName(2, "std")
642
643 n = histogram.GetSumOfWeights()
644 mean = histogram.GetMean()
645 std = histogram.GetStdDev()
646
647 fit_tf1.SetParameter(0, n)
648 fit_tf1.SetParameter(1, mean)
649 fit_tf1.SetParameter(2, std)
650
651 fit_options = "LM"
652 return self.fit(fit_tf1,
653 fit_options,
654 z_score=z_score)
655
656 def fit_line(self):
657 """Fit a general line to a one dimensional histogram"""
658 title = "line"
659 formula = "x++1"
660 fit_tf1 = ROOT.TF1("Fit", formula)
661 fit_tf1.SetTitle(title)
662 fit_tf1.SetParName(0, "slope")
663 fit_tf1.SetParName(1, "intercept")
664 self.fit(fit_tf1, 'M')
665
666 def fit_const(self):
667 """Fit a constant function to a one dimensional histogram"""
668 title = "const"
669 formula = "[0]"
670 fit_tf1 = ROOT.TF1("Fit", formula)
671 fit_tf1.SetTitle(title)
672 fit_tf1.SetParName(0, "intercept")
673 self.fit(fit_tf1, 'M')
674
675 def fit_diag(self):
676 """Fit a diagonal line through the origin to a one dimensional histogram"""
677 title = "diag"
678 formula = "[0]*x"
679 fit_tf1 = ROOT.TF1("Fit", formula)
680 fit_tf1.SetTitle(title)
681 fit_tf1.SetParName(0, "slope")
682 self.fit(fit_tf1, 'M')
683
684 def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
685 """Fit a user defined function to a one dimensional histogram
686
687 Parameters
688 ----------
689 formula : str or TF1
690 Formula string or TH1 to be fitted. See TF1 constructors for that is a valid formula
691 options : str
692 Options string to be used in the fit. See TH1::Fit()
693 lower_bound : float
694 Lower bound of the range to be fitted
695 upper_bound : float
696 Upper bound of the range to be fitted
697 """
698 plot = self.plot
699 if plot is None:
700 raise RuntimeError('Validation plot must be filled before it can be fitted.')
701
702 if not isinstance(plot, ROOT.TH1D):
703 raise RuntimeError('Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
704
705 histogram = plot
706
707 xaxis = histogram.GetXaxis()
708 n_bins = xaxis.GetNbins()
709 hist_lower_bound = xaxis.GetBinLowEdge(1)
710 hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
711
712 if z_score is not None:
713 mean = histogram.GetMean()
714 std = histogram.GetStdDev()
715
716 if lower_bound is None:
717 lower_bound = mean - z_score * std
718
719 if upper_bound is None:
720 upper_bound = mean + z_score * std
721
722 # Setup the plotting range of the function to match the histogram
723 if isinstance(formula, ROOT.TF1):
724 fit_tf1 = formula
725 fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
726 else:
727 fit_tf1 = ROOT.TF1("Fit",
728 formula,
729 hist_lower_bound,
730 hist_upper_bound)
731 get_logger().info('Fitting with %s', fit_tf1.GetExpFormula())
732
733 # Determine fit range
734 if lower_bound is None or lower_bound < hist_lower_bound:
735 lower_bound = hist_lower_bound
736 if upper_bound is None or upper_bound > hist_upper_bound:
737 upper_bound = hist_upper_bound
738
739 # Make sure the fitted function is not automatically added since we want to do that one our own.
740 # Look for the documentation of TH1::Fit() for details of the options.
741 if 'N' not in options:
742 options += 'N'
743
744 fit_res = histogram.Fit(fit_tf1, options + "S", "", lower_bound, upper_bound)
745
746 self.set_fit_tf1(histogram, fit_tf1)
747 return fit_res
748
749 def show(self):
750 """Show the plot."""
751 if self.plot:
752 self.plot.Draw()
753 else:
754 raise ValueError("Can not show a validation plot that has not been filled.")
755
756 def write(self, tdirectory=None):
757 """Write the plot to file
758
759 Parameters
760 ----------
761 tdirectory : ROOT.TDirectory, optional
762 ROOT directory to which the plot should be written.
763 If omitted write to the current directory
764 """
765 if not self.plot:
766 raise ValueError("Can not write a validation plot that has not been filled.")
767
768 with root_cd(tdirectory):
769 ValidationPlot.set_tstyle()
770 if self.plot not in self.histograms:
771 self.plot.Write()
772
773 # always disable ROOT's stat plot because it hides items
774 meta_options = ["nostats"]
775
776 # add expert option, if requested
777 if self.is_expert:
778 meta_options.append("expert")
779 else:
780 meta_options.append("shifter")
781
782 # give a custom pvalue warning / error zone if requested
783 if self.pvalue_error is not None:
784 meta_options.append(f"pvalue-error={self.pvalue_error}")
785 if self.pvalue_warn is not None:
786 meta_options.append(f"pvalue-warn={self.pvalue_warn}")
787
788 # Indicator if the y axes should be displayed as a logarithmic scale
789 if self.y_log:
790 meta_options.append("logy")
791
792 meta_options_str = ",".join(meta_options)
793
794 for histogram in self.histograms:
795 histogram.GetListOfFunctions().Add(ROOT.TNamed('MetaOptions', meta_options_str))
796 histogram.Write()
797
798 @property
799 def is_expert(self):
800 """Getter method if an plot plot is marked as expert plot"""
801 return self._is_expert
802
803 @property
804 def title(self):
805 """Getter for the plot title"""
806 return self._title
807
808 @title.setter
809 def title(self, title):
810 """Setter for the plot title"""
811 self._title = title
812 if self.plot:
813 self.plot.SetTitle(title)
814 for histogram in self.histograms:
815 histogram.SetTitle(title)
816
817 @property
818 def xlabel(self):
819 """Getter for the axis label at the x axis"""
820 return self._xlabel
821
822 @xlabel.setter
823 def xlabel(self, xlabel):
824 """Setter for the axis label at the x axis"""
825 self._xlabel = xlabel
826 for histogram in self.histograms:
827 histogram.GetXaxis().SetTitle(xlabel)
828
829 @property
830 def ylabel(self):
831 """Getter for the axis label at the y axis"""
832 return self._ylabel
833
834 @ylabel.setter
835 def ylabel(self, ylabel):
836 """Setter for the axis label at the y axis"""
837 self._ylabel = ylabel
838 for histogram in self.histograms:
839 histogram.GetYaxis().SetTitle(ylabel)
840
841 @property
842 def contact(self):
843 """Getter for the contact email address to be displayed on the validation page"""
844 return self._contact
845
846 @contact.setter
847 def contact(self, contact):
848 """Setter for the contact email address to be displayed on the validation page"""
849 self._contact = contact
850 for histogram in self.histograms:
851 found_obj = histogram.FindObject('Contact')
852 if not found_obj:
853 tnamed = ROOT.TNamed("Contact", contact)
854 histogram.GetListOfFunctions().Add(tnamed)
855 found_obj = histogram.FindObject('Contact')
856 found_obj.SetTitle(contact)
857
858 @property
859 def description(self):
860 """Getter for the description to be displayed on the validation page"""
861 return self._description
862
863 @description.setter
864 def description(self, description):
865 """Setter for the description to be displayed on the validation page"""
866 self._description = description
867 for histogram in self.histograms:
868 found_obj = histogram.FindObject('Description')
869 if not found_obj:
870 tnamed = ROOT.TNamed("Description", description)
871 histogram.GetListOfFunctions().Add(tnamed)
872 found_obj = histogram.FindObject('Description')
873 found_obj.SetTitle(description)
874
875 @property
876 def check(self):
877 """Getter for the check to be displayed on the validation page"""
878 return self._check
879
880 @check.setter
881 def check(self, check):
882 """Setter for the check to be displayed on the validation page"""
883 self._check = check
884 for histogram in self.histograms:
885 found_obj = histogram.FindObject('Check')
886 if not found_obj:
887 tnamed = ROOT.TNamed("Check", check)
888 histogram.GetListOfFunctions().Add(tnamed)
889 found_obj = histogram.FindObject('Check')
890 found_obj.SetTitle(check)
891
892 # Implementation details #
893 # ###################### #
894
895 @staticmethod
896 def unpack_2d_param(param):
897 """Unpacks a function parameter for the two dimensional plots.
898
899 If it is a pair the first parameter shall apply to the x coordinate
900 the second to the y coordinate. In this case the pair is returned as two values
901
902 If something else is given the it is assumed that this parameter should equally apply
903 to both coordinates. In this case the same values is return twice as a pair.
904
905 Parameters
906 ----------
907 param : pair or single value
908 Function parameter for a two dimensional plot
909
910 Returns
911 -------
912 pair
913 A pair of values being the parameter for the x coordinate and
914 the y coordinate respectively
915 """
916 try:
917 if len(param) == 2:
918 x_param, y_param = param
919 except TypeError:
920 x_param = param
921 y_param = param
922 return x_param, y_param
923
924 @staticmethod
925 def is_binary(xs):
926 """Determine if the data consists of boolean values"""
927 return statistics.is_binary_series(xs)
928
929 @staticmethod
930 def is_discrete(xs, max_n_unique=None):
931 """Determine if the data consists of discrete values"""
932 return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
933
934 @staticmethod
936 """Find exceptionally frequent values
937
938 Parameters
939 ----------
940 xs : np.array (1d)
941 Data series
942
943 Returns
944 -------
945 np.array (1d)
946 A list of the found exceptional values.
947 """
948 return statistics.rice_exceptional_values(xs)
949
950 @staticmethod
952 """Does an estimation of mean and standard deviation robust against outliers.
953
954 Parameters
955 ----------
956 xs : np.array (1d)
957 Data series
958
959 Returns
960 -------
961 float, float
962 Pair of mean and standard deviation
963 """
964 x_mean = statistics.truncated_mean(xs)
965 x_std = statistics.trimmed_std(xs)
966 return x_mean, x_std
967
968 @staticmethod
970 """Formats a value to be placed at a tick on an axis."""
971 if np.isfinite(value) and value == np.round(value):
972 return str(int(value))
973 else:
974 formated_value = f"{value:.5g}"
975
976 # if the label is to long, switch to shorter "e" format
977 if len(formated_value) > 8:
978 formated_value = f"{value:.3e}"
979 return formated_value
980
981 def create_1d(self,
982 th1_factory,
983 xs,
984 ys=None,
985 weights=None,
986 bins=None,
987 stackby=None,
988 lower_bound=None,
989 upper_bound=None,
990 outlier_z_score=None,
991 include_exceptionals=True,
992 allow_discrete=False,
993 cumulation_direction=None):
994 """Combined factory method for creating a one dimensional histogram or a profile plot."""
995 name = self.name
996
997 # Coerce values to a numpy array. Do not copy if already a numpy array.
998 xs = np.array(xs, copy=False)
999
1000 if ys is not None:
1001 ys = np.array(ys, copy=False)
1002
1003 if weights is not None:
1004 weights = np.array(weights, copy=False)
1005
1006 bin_edges, bin_labels = self.determine_bin_edges(xs,
1007 stackbys=stackby,
1008 bins=bins,
1009 lower_bound=lower_bound,
1010 upper_bound=upper_bound,
1011 outlier_z_score=outlier_z_score,
1012 include_exceptionals=include_exceptionals,
1013 allow_discrete=allow_discrete)
1014
1015 n_bins = len(bin_edges) - 1
1016 self.lower_bound = bin_edges[0]
1017 self.upper_bound = bin_edges[-1]
1018 histogram = th1_factory(name, '', n_bins, bin_edges)
1019
1020 if bin_labels:
1021 get_logger().info("One dimensional plot %s is discrete in x.", name)
1022 x_taxis = histogram.GetXaxis()
1023 for i_bin, bin_label in enumerate(bin_labels):
1024 x_taxis.SetBinLabel(i_bin + 1, bin_label)
1025 self.add_stats_entry(histogram, "dx", 1)
1026
1027 else:
1028 bin_width = bin_edges[1] - bin_edges[0]
1029 self.add_stats_entry(histogram, "dx", bin_width)
1030
1031 self.create(histogram,
1032 xs,
1033 ys=ys,
1034 weights=weights,
1035 stackby=stackby,
1036 cumulation_direction=cumulation_direction,
1037 reverse_stack=True)
1038 # Reverse the stack to have the signal distribution at the bottom
1039
1040 # Adjust the discrete bins after the filling to be equidistant
1041 if bin_labels:
1042 for histogram in self.histograms:
1043 x_taxis = histogram.GetXaxis()
1044 bin_edges = array.array("d", list(range(len(bin_labels) + 1)))
1045 x_taxis.Set(n_bins, bin_edges)
1046
1047 def create(self,
1048 histogram_template,
1049 xs,
1050 ys=None,
1051 weights=None,
1052 stackby=None,
1053 cumulation_direction=None,
1054 reverse_stack=None):
1055 """Create histograms from a template, possibly stacked"""
1056
1057 histograms = []
1058
1059 if stackby is None:
1060 histogram = histogram_template
1061 self.fill_into(histogram, xs, ys, weights=weights)
1062 if cumulation_direction is not None:
1063 histogram = self.cumulate(histogram, cumulation_direction=cumulation_direction)
1064
1065 histograms.append(histogram)
1066 plot = histogram
1067
1068 else:
1069 stackby = np.array(stackby, copy=False)
1070 name = histogram_template.GetName()
1071
1072 histograms = self.fill_into_grouped(histogram_template,
1073 xs,
1074 ys=ys,
1075 weights=weights,
1076 groupbys=stackby,
1077 groupby_label="stack")
1078
1079 if cumulation_direction is not None:
1080 histograms = [self.cumulate(histogram, cumulation_direction=cumulation_direction)
1081 for histogram in histograms]
1082
1083 plot = self.create_stack(histograms, name=name + "_stacked", reverse_stack=reverse_stack)
1084
1085 self.histograms = histograms
1086 self.plot = plot
1087 self.attach_attributes()
1088
1089 @classmethod
1090 def create_stack(cls, histograms, name, reverse_stack, force_graph=False):
1091 """Create a stack of histograms"""
1092 if len(histograms) == 1:
1093 plot = histograms[0]
1094 else:
1095 if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph)) or force_graph:
1096 plot = ROOT.TMultiGraph()
1097 else:
1098 plot = ROOT.THStack()
1099
1100 plot.SetName(name)
1101
1102 # Add the histogram in reverse order such
1103 # that the signal usually is on the bottom an well visible
1104 if reverse_stack:
1105 for histogram in reversed(histograms):
1106 if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1107 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1108 plot.Add(histogram, "APZ")
1109 else:
1110 plot.Add(histogram)
1111 else:
1112 for histogram in histograms:
1113 if isinstance(histogram, ROOT.TProfile) or (isinstance(histogram, ROOT.TH1) and force_graph):
1114 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1115 plot.Add(histogram, "APZ")
1116 else:
1117 plot.Add(histogram)
1118
1119 return plot
1120
1121 @classmethod
1122 def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False):
1123 """Extract errors from a TProfile histogram and create a TGraph from these"""
1124 if isinstance(tprofile, ROOT.TGraph):
1125 return tprofile
1126
1127 x_taxis = tprofile.GetXaxis()
1128 n_bins = x_taxis.GetNbins()
1129
1130 # bin_ids_with_underflow = list(range(n_bins + 1))
1131 bin_ids_without_underflow = list(range(1, n_bins + 1))
1132
1133 bin_centers = np.array([x_taxis.GetBinCenter(i_bin) for i_bin in bin_ids_without_underflow])
1134 if abs_x:
1135 bin_centers = np.abs(bin_centers)
1136 bin_widths = np.array([x_taxis.GetBinWidth(i_bin) for i_bin in bin_ids_without_underflow])
1137 bin_x_errors = bin_widths / 2.0
1138
1139 # Now for the histogram content not including the underflow
1140 bin_contents = np.array([tprofile.GetBinContent(i_bin) for i_bin in bin_ids_without_underflow])
1141 bin_y_errors = np.array([tprofile.GetBinError(i_bin) for i_bin in bin_ids_without_underflow])
1142
1143 tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1144
1145 tgrapherrors.GetHistogram().SetOption("APZ") # <-- This has no effect?
1146
1147 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1148 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1149
1150 # Copy all functions and stats entries
1151 for tobject in tprofile.GetListOfFunctions():
1152 tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1153
1154 # Add stats entries that are displayed for profile plots
1155 cls.add_stats_entry(tgrapherrors, 'count', tprofile.GetEntries())
1156
1157 stats_values = np.array([np.nan] * 6)
1158 tprofile.GetStats(stats_values)
1159
1160 sum_w = stats_values[0]
1161 # sum_w2 = stats_values[1]
1162 sum_wx = stats_values[2]
1163 sum_wx2 = stats_values[3]
1164 sum_wy = stats_values[4]
1165 sum_wy2 = stats_values[5]
1166
1167 cls.add_stats_entry(tgrapherrors,
1168 "x avg",
1169 sum_wx / sum_w)
1170
1171 cls.add_stats_entry(tgrapherrors,
1172 "x std",
1173 np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1174
1175 cls.add_stats_entry(tgrapherrors,
1176 "y avg",
1177 sum_wy / sum_w)
1178
1179 cls.add_stats_entry(tgrapherrors,
1180 "y std",
1181 np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1182
1183 cls.add_stats_entry(tgrapherrors,
1184 'cov',
1185 tgrapherrors.GetCovariance())
1186
1187 cls.add_stats_entry(tgrapherrors,
1188 'corr',
1189 tgrapherrors.GetCorrelationFactor())
1190
1191 return tgrapherrors
1192
1194 histogram_template,
1195 xs,
1196 ys=None,
1197 weights=None,
1198 groupbys=None,
1199 groupby_label="group"):
1200 """Fill data into similar histograms in groups indicated by a groupby array"""
1201
1202 histograms = []
1203 unique_groupbys = np.unique(groupbys)
1204 name = histogram_template.GetName()
1205
1206 for i_value, value in enumerate(unique_groupbys):
1207 if np.isnan(value):
1208 indices_for_value = np.isnan(groupbys)
1209 else:
1210 indices_for_value = groupbys == value
1211
1212 # Make a copy of the empty histogram
1213 histogram_for_value = histogram_template.Clone(name + '_' + str(value))
1214 i_root_color = i_value + 1
1215
1216 self.set_color(histogram_for_value, i_root_color)
1217
1218 if groupby_label:
1219 self.add_stats_entry(histogram_for_value, groupby_label, value)
1220
1221 self.fill_into(histogram_for_value,
1222 xs,
1223 ys=ys,
1224 weights=weights,
1225 filter=indices_for_value)
1226
1227 histograms.append(histogram_for_value)
1228
1229 return histograms
1230
1231 def set_color(self, tobject, root_i_color):
1232 """Set the color of the ROOT object.
1233
1234 By default the line color of a TGraph should be invisible, so do not change it
1235 For other objects set the marker and the line color
1236
1237 Parameters
1238 ----------
1239 tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1240 Object of which the color should be set.
1241 root_i_color : int
1242 Color index of the ROOT color table
1243 """
1244 if isinstance(tobject, ROOT.TGraph):
1245 tobject.SetMarkerColor(root_i_color)
1246 else:
1247 tobject.SetLineColor(root_i_color)
1248 tobject.SetMarkerColor(root_i_color)
1249
1250 def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1251 """Fill the data into the plot object"""
1252 if isinstance(plot, ROOT.TGraph):
1253 if ys is None:
1254 raise ValueError("ys are required for filling a graph")
1255 self.fill_into_tgraph(plot, xs, ys, filter=filter)
1256 elif isinstance(plot, ROOT.TGraphErrors):
1257 if ys is None:
1258 raise ValueError("ys are required for filling a graph error")
1259
1260 self.fill_into_tgrapherror(plot, xs, ys)
1261 else:
1262 self.fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1263
1264 def fill_into_tgrapherror(self, graph, xs, ys, filter=None):
1265 """fill point values and error of the x and y axis into the graph"""
1266
1267 assert (len(xs[0]) == len(ys[0]))
1268 # set the overall amount of points
1269 graph.Set(len(xs[0]))
1270
1271 for i in range(len(xs[0])):
1272 graph.SetPoint(i, xs[0][i], ys[0][i])
1273 graph.SetPointError(i, xs[1][i], ys[1][i])
1274
1275 def fill_into_tgraph(self, graph, xs, ys, filter=None):
1276 """Fill the data into a TGraph"""
1277
1278 # Save some ifs by introducing a dummy slicing as a non filter
1279 if filter is None:
1280 filter = slice(None)
1281
1282 xs = xs[filter]
1283 ys = ys[filter]
1284
1285 max_n_data = 100000
1286 x_n_data = len(xs)
1287 y_n_data = len(ys)
1288
1289 if max_n_data:
1290 if x_n_data > max_n_data or y_n_data > max_n_data:
1291 get_logger().warning(f"Number of points in scatter graph {self.name} exceed limit {max_n_data}")
1292
1293 get_logger().warning(f"Cropping {max_n_data}")
1294
1295 xs = xs[0:max_n_data]
1296 ys = ys[0:max_n_data]
1297
1298 x_axis = graph.GetXaxis()
1299 y_axis = graph.GetYaxis()
1300
1301 x_lower_bound = x_axis.GetXmin()
1302 x_upper_bound = x_axis.GetXmax()
1303
1304 y_lower_bound = y_axis.GetXmin()
1305 y_upper_bound = y_axis.GetXmax()
1306
1307 x_underflow_indices = xs < x_lower_bound
1308 x_overflow_indices = xs > x_upper_bound
1309
1310 y_underflow_indices = ys < y_lower_bound
1311 y_overflow_indices = ys > y_upper_bound
1312
1313 plot_indices = ~(np.isnan(xs) |
1314 x_underflow_indices |
1315 x_overflow_indices |
1316 np.isnan(ys) |
1317 y_underflow_indices |
1318 y_overflow_indices)
1319
1320 n_plot_data = np.sum(plot_indices)
1321 plot_xs = xs[plot_indices]
1322 plot_ys = ys[plot_indices]
1323
1324 graph.Set(int(n_plot_data))
1325 for i, (x, y) in enumerate(zip(plot_xs, plot_ys)):
1326 graph.SetPoint(i, x, y)
1327
1328 self.add_stats_entry(graph, 'count', np.sum(np.isfinite(xs)))
1329
1330 self.add_nan_inf_stats(graph, 'x', xs)
1331 self.add_nan_inf_stats(graph, 'y', ys)
1332
1333 x_n_underflow = np.sum(x_underflow_indices)
1334 if x_n_underflow:
1335 self.add_stats_entry(graph, 'x underf.', x_n_underflow)
1336
1337 x_n_overflow = np.sum(x_overflow_indices)
1338 if x_n_overflow:
1339 self.add_stats_entry(graph, 'x overf.', x_n_overflow)
1340
1341 y_n_underflow = np.sum(y_underflow_indices)
1342 if y_n_underflow:
1343 self.add_stats_entry(graph, 'y underf.', y_n_underflow)
1344
1345 y_n_overflow = np.sum(y_overflow_indices)
1346 if y_n_overflow:
1347 self.add_stats_entry(graph, 'y overf.', y_n_overflow)
1348
1349 self.add_stats_entry(graph, 'x avg', graph.GetMean(1))
1350 self.add_stats_entry(graph, 'x std', graph.GetRMS(1))
1351
1352 self.add_stats_entry(graph, 'y avg', graph.GetMean(2))
1353 self.add_stats_entry(graph, 'y std', graph.GetRMS(2))
1354
1355 self.add_stats_entry(graph, 'cov', graph.GetCovariance())
1356 self.add_stats_entry(graph, 'corr', graph.GetCorrelationFactor())
1357
1358 def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None):
1359 """Fill the histogram blocking non finite values
1360
1361 Parameters
1362 ----------
1363 histogram : ROOT.TH1
1364 The histogram to be filled
1365 xs : numpy.ndarray (1d)
1366 Data for the first axes
1367 ys : numpy.ndarray (1d), optional
1368 Data for the second axes
1369 weights : numpy.ndarray (1d), optional
1370 Weight of the individual points. Defaults to one for each
1371 filter : numpy.ndarray, optional
1372 Boolean index array indicating which entries shall be taken.
1373 """
1374 # Save some ifs by introducing a dummy slicing as a non filter
1375 if filter is None:
1376 filter = slice(None)
1377
1378 xs = xs[filter]
1379 # Count the nan and inf values in x
1380 self.add_nan_inf_stats(histogram, 'x', xs)
1381 finite_filter = np.isfinite(xs)
1382
1383 if ys is not None:
1384 ys = ys[filter]
1385 # Count the nan and inf values in y
1386 self.add_nan_inf_stats(histogram, 'y', ys)
1387 finite_filter &= np.isfinite(ys)
1388
1389 if weights is None:
1390 xs = xs[finite_filter]
1391 weights = np.ones_like(xs)
1392 else:
1393 weights = weights[filter]
1394 self.add_nan_inf_stats(histogram, 'w', weights)
1395 finite_filter &= np.isfinite(weights)
1396 xs = xs[finite_filter]
1397 weights[finite_filter]
1398
1399 if ys is not None:
1400 ys = ys[finite_filter]
1401
1402 # Now fill the actual histogram
1403 try:
1404 histogram.FillN
1405 except AttributeError:
1406 Fill = histogram.Fill
1407 if ys is None:
1408 fill = np.frompyfunc(Fill, 2, 1)
1409 fill(xs.astype(np.float64, copy=False),
1410 weights.astype(np.float64, copy=False))
1411 else:
1412 fill = np.frompyfunc(Fill, 3, 1)
1413 fill(xs.astype(np.float64, copy=False),
1414 ys.astype(np.float64, copy=False),
1415 weights.astype(np.float64, copy=False))
1416 else:
1417 if ys is None:
1418 # Make the array types compatible with the ROOT interface if necessary
1419 xs = xs.astype(np.float64, copy=False)
1420 weights = weights.astype(np.float64, copy=False)
1421 n = len(xs)
1422 if n != 0:
1423 histogram.FillN(n, xs, weights)
1424 else:
1425 basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1426
1427 else:
1428 # Make the array types compatible with the ROOT interface if necessary
1429 xs = xs.astype(np.float64, copy=False)
1430 ys = ys.astype(np.float64, copy=False)
1431 weights = weights.astype(np.float64, copy=False)
1432 n = len(xs)
1433 if n != 0:
1434 histogram.FillN(n, xs, ys, weights)
1435 else:
1436 basf2.B2WARNING("No values to be filled into histogram: " + self.name)
1437
1438 self.set_additional_stats_tf1(histogram)
1439
1440 @classmethod
1441 def add_nan_inf_stats(cls, histogram, name, xs):
1442 """ Extracts the counts of non finite floats from a series
1443 and adds them as additional statistics to the histogram.
1444
1445 Parameters
1446 ----------
1447 histogram : derived from ROOT.TH1 or ROOT.TGraph
1448 Something having a GetListOfFunctions method that
1449 name : str
1450 A label for the data series to be prefixed to the entries.
1451 xs : numpy.ndarray (1d)
1452 Data from which the non finit floats should be counted.
1453 """
1454 n_nans = np.isnan(xs).sum()
1455 if n_nans > 0:
1456 cls.add_stats_entry(histogram, name + ' nan', n_nans)
1457
1458 n_positive_inf = np.sum(xs == np.inf)
1459 if n_positive_inf > 0:
1460 cls.add_stats_entry(histogram, name + ' pos inf', n_positive_inf)
1461
1462 n_negative_inf = np.sum(xs == -np.inf)
1463 if n_negative_inf > 0:
1464 cls.add_stats_entry(histogram, name + ' neg inf', n_negative_inf)
1465
1466 @classmethod
1467 def add_stats_entry(cls, histogram, label, value):
1468 """Add a new additional statistics to the histogram.
1469
1470 Parameters
1471 ----------
1472 histogram : derived from ROOT.TH1 or ROOT.TGraph
1473 Something having a GetListOfFunctions method that holds the additional statistics
1474 label : str
1475 Label of the statistic
1476 value : float
1477 Value of the statistic
1478 """
1479 stats_entry = StatsEntry(str(label), float(value))
1480 histogram.GetListOfFunctions().Add(stats_entry)
1481 cls.set_additional_stats_tf1(histogram)
1482
1483 @classmethod
1484 def get_additional_stats(cls, histogram):
1485 """Get the additional statistics from the histogram and return them a dict.
1486
1487 Parameters
1488 ----------
1489 histogram : derived from ROOT.TH1 or ROOT.TGraph
1490 Something having a GetListOfFunctions method that holds the additional statistics
1491
1492 Returns
1493 -------
1494 collection.OrderedDict
1495 A map of labels to values for the additional statistics
1496 """
1497 additional_stats = collections.OrderedDict()
1498 for tobject in histogram.GetListOfFunctions():
1499 if isinstance(tobject, StatsEntry):
1500 stats_entry = tobject
1501 label = stats_entry.GetName()
1502 value = stats_entry.GetVal()
1503 additional_stats[label] = value
1504 return additional_stats
1505
1506 @classmethod
1507 def gaus_slice_fit(cls, th2, name, z_score=None):
1508 """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1509 # profile = th2.ProfileX(name)
1510
1511 y_taxis = th2.GetYaxis()
1512 th2_lower_bound = y_taxis.GetXmin()
1513 th2_upper_bound = y_taxis.GetXmax()
1514 th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1515 n_y_bins = y_taxis.GetNbins()
1516 if z_score:
1517 y_mean = th2.GetMean(2)
1518 y_std = th2.GetStdDev(2)
1519 fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1520 fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1521 fit_height = fit_upper_bound - fit_lower_bound
1522 # Require all covered bins to be filled
1523 required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1524 else:
1525 fit_lower_bound = th2_lower_bound
1526 fit_upper_bound = th2_upper_bound
1527 fit_height = fit_upper_bound - fit_lower_bound
1528 required_n_bins_inslice_filled = n_y_bins / 1.61
1529
1530 # Highest required number of bins should be a third
1531 required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1532
1533 fit_tf1 = ROOT.TF1("Fit", "gaus", fit_lower_bound, fit_upper_bound)
1534 fit_tf1.SetParName(0, "n")
1535 fit_tf1.SetParName(1, "mean")
1536 fit_tf1.SetParName(2, "std")
1537 i_first_bin = 0
1538 i_last_bin = -1
1539 fit_options = "QNR"
1540 param_fit_th1s = ROOT.TObjArray()
1541 th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1542 int(required_n_bins_inslice_filled),
1543 fit_options, param_fit_th1s)
1544
1545 th1_means = param_fit_th1s.At(1)
1546 th1_means.SetName(name)
1547 th1_means.SetTitle(th2.GetTitle())
1548 for label, value in cls.get_additional_stats(th2).items():
1549 cls.add_stats_entry(th1_means, label, value)
1550
1551 # Manually copy labels grumble grumble
1552 x_taxis = th2.GetXaxis()
1553 new_x_taxis = th1_means.GetXaxis()
1554 for i_bin in range(x_taxis.GetNbins() + 2):
1555 label = x_taxis.GetBinLabel(i_bin)
1556 if label != "":
1557 new_x_taxis.SetBinLabel(i_bin, label)
1558
1559 # Adjust plot bound to reflect the fit range.
1560 data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1561 data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1562 data_height = data_upper_bound - data_lower_bound
1563
1564 plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1565 plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1566
1567 th1_means.SetMinimum(plot_lower_bound)
1568 th1_means.SetMaximum(plot_upper_bound)
1569
1570 return th1_means
1571
1572 @classmethod
1573 def cumulate(cls, histogram, cumulation_direction=None):
1574 """Cumulates the histogram inplace.
1575
1576 Parameters
1577 ----------
1578 histogram : ROOT.TH1 or ROOT.TProfile
1579 Filled histogram to be cumulated
1580 cumulation_direction : int, optional
1581 Direction is indicated by the sign.
1582 Positive means from left to right, negative means from right to left.
1583 If now cumulation direction is given return the histogram as is.
1584
1585 Returns
1586 -------
1587 ROOT.TH1
1588 Cumulated histogram potentially altered inplace.
1589 """
1590 if not cumulation_direction:
1591 return histogram
1592
1593 cumulate_backward = cumulation_direction < 0
1594 cumulate_forward = not cumulate_backward
1595
1596 if isinstance(histogram, ROOT.TH2):
1597 raise ValueError("Cannot cumulate a two dimensional histogram.")
1598
1599 if isinstance(histogram, ROOT.TH3):
1600 raise ValueError("Cannot cumulate a three dimensional histogram.")
1601
1602 if not isinstance(histogram, ROOT.TH1):
1603 raise ValueError("Can only cumulate a one dimensional histogram.")
1604
1605 if isinstance(histogram, ROOT.TProfile):
1606 tprofile = histogram
1607 # Turn the profile histogram into graph where we can set the new content and errors
1608 tgraph = cls.convert_tprofile_to_tgrapherrors(histogram)
1609 tgraph.SetName(tprofile.GetName())
1610
1611 n_bins = histogram.GetNbinsX()
1612 n_points = n_bins
1613 cumulated_content = 0.0
1614 cumulated_entries = 0
1615 cumulated_std = 0.0
1616
1617 # Always include the overflow bins.
1618 i_bins = list(range(0, n_bins + 2))
1619 if not cumulate_forward:
1620 i_bins = reversed(i_bins)
1621
1622 for i_bin in i_bins:
1623 i_point = i_bin - 1
1624 bin_content = tprofile.GetBinContent(i_bin)
1625 bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1626 bin_std = tprofile.GetBinError(i_bin)
1627
1628 if bin_entries != 0:
1629 cumulated_content = (
1630 1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1631 (cumulated_entries + bin_entries)
1632 )
1633
1634 cumulated_std = (
1635 math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1636 (cumulated_entries + bin_entries)
1637 )
1638
1639 cumulated_entries = cumulated_entries + bin_entries
1640 else:
1641 # empty bin does not add anything to the cumulation
1642 pass
1643
1644 if i_point >= 0 and i_point < n_points:
1645 x = tgraph.GetX()[i_point]
1646 # x = ROOT.Double()
1647 # y = ROOT.Double()
1648 # tgraph.GetPoint(i_point, x, y)
1649 tgraph.SetPoint(i_point, x, cumulated_content)
1650
1651 x_error = tgraph.GetErrorX(i_point)
1652 tgraph.SetPointError(i_point, x_error, cumulated_std)
1653 return tgraph
1654
1655 else:
1656 # Always include the overflow bins.
1657 n_bins = histogram.GetNbinsX()
1658 cumulated_content = 0.0
1659
1660 i_bins = list(range(0, n_bins + 2))
1661 if not cumulate_forward:
1662 i_bins = reversed(i_bins)
1663
1664 for i_bin in i_bins:
1665 bin_content = histogram.GetBinContent(i_bin)
1666 cumulated_content += bin_content
1667 histogram.SetBinContent(i_bin, cumulated_content)
1668
1669 return histogram
1670
1672 xs,
1673 stackbys=None,
1674 bins=None,
1675 lower_bound=None,
1676 upper_bound=None,
1677 outlier_z_score=None,
1678 include_exceptionals=True,
1679 allow_discrete=False):
1680 """Deducing bin edges from a data series.
1681
1682 Parameters
1683 ----------
1684 xs : numpy.ndarray (1d)
1685 Data point for which a binning should be found.
1686 stackbys : numpy.ndarray (1d)
1687 Categories of the data points to be distinguishable
1688 bins : list(float) or int or None, optional
1689 Preset bin edges or preset number of desired bins.
1690 The default, None, means the bound should be extracted from data.
1691 The rice rule is used the determine the number of bins.
1692 If a list of floats is given return them immediately.
1693 lower_bound : float or None, optional
1694 Preset lower bound of the binning range.
1695 The default, None, means the bound should be extracted from data.
1696 upper_bound : float or None, optional
1697 Preset upper bound of the binning range.
1698 The default, None, means the bound should be extracted from data.
1699 outlier_z_score : float or None, optional
1700 Threshold z-score of outlier detection.
1701 The default, None, means no outlier detection.
1702 include_exceptionals : bool, optional
1703 If the outlier detection is active this switch indicates,
1704 if values detected as exceptionally frequent shall be included
1705 nevertheless into the binning range. Default is True,
1706 which means exceptionally frequent values as included
1707 even if they are detected as outliers.
1708
1709 Returns
1710 -------
1711 np.array (1d), list(str)
1712 Pair of bin edges and labels deduced from the series.
1713 Second element is None if the series is not detected as discrete.
1714 """
1715 debug = get_logger().debug
1716 debug('Determine binning for plot named %s', self.name)
1717
1718 if bins == 'flat':
1719 # Special value for the flat distribution binning
1720 n_bins = None
1721
1722 elif isinstance(bins, collections.abc.Iterable):
1723 # Bins is considered as an array
1724 # Construct a float array forwardable to root.
1725 bin_edges = bins
1726 bin_edges = array.array('d', bin_edges)
1727 bin_labels = None
1728 return bin_edges, bin_labels
1729
1730 # If bins is not an iterable assume it is the number of bins or None
1731 elif bins is None:
1732 n_bins = None
1733 else:
1734 # Check that bins can be coerced to an integer.
1735 n_bins = int(bins)
1736
1737 # Do not allow negative bin numbers
1738 if not n_bins > 0:
1739 message = f'Cannot accept n_bins={bins} as number of bins, because it is not a number greater than 0.'
1740 raise ValueError(message)
1741
1742 # Coerce values to a numpy array. Do not copy if already a numpy array.
1743 xs = np.array(xs, copy=False)
1744
1745 if self.is_binary(xs) or (allow_discrete and self.is_discrete(xs)):
1746 # This covers also the case
1747 debug('Discrete binning values encountered')
1748 finite_xs = xs[np.isfinite(xs)]
1749 unique_xs = np.unique(finite_xs)
1750
1751 # Crop the unique values between the lower and upper bound
1752 if lower_bound is None:
1753 if len(unique_xs) == 0:
1754 if upper_bound is None:
1755 lower_bound = 0
1756 else:
1757 lower_bound = upper_bound - 1
1758 else:
1759 lower_bound = np.min(unique_xs)
1760 else:
1761 unique_xs = unique_xs[unique_xs >= lower_bound]
1762
1763 if upper_bound is None:
1764 if len(unique_xs) == 0:
1765 upper_bound = lower_bound + 1
1766 else:
1767 upper_bound = np.min(unique_xs)
1768 else:
1769 unique_xs = unique_xs[unique_xs <= upper_bound]
1770
1771 if n_bins is None:
1772 n_bins = len(unique_xs) or 1
1773
1774 if len(unique_xs) > 0 and n_bins >= len(unique_xs):
1775 # Construct a float array forwardable to root.
1776 bin_edges = array.array('d', unique_xs)
1777 format_bin_label = self.format_bin_label
1778 bin_labels = [format_bin_label(value) for value in bin_edges]
1779 bin_edges.append(bin_edges[-1] + 1)
1780 return bin_edges, bin_labels
1781
1782 else:
1783 # Ambiguous case what to do in case of a number of requested bins
1784 # that is lower than the number of unique values?
1785
1786 # Continue with an equistant binning for now.
1787 pass
1788
1789 debug('Lower bound %s', lower_bound)
1790 debug('Upper bound %s', upper_bound)
1791 debug('N bins %s', n_bins)
1792
1793 else:
1794 bin_range = self.determine_bin_range(xs,
1795 stackbys=stackbys,
1796 n_bins=n_bins,
1797 lower_bound=lower_bound,
1798 upper_bound=upper_bound,
1799 outlier_z_score=outlier_z_score,
1800 include_exceptionals=include_exceptionals)
1801
1802 n_bins, lower_bound, upper_bound = bin_range
1803
1804 n_bin_edges = n_bins + 1
1805 if lower_bound != upper_bound:
1806 if bins == "flat":
1807 debug("Creating flat distribution binning")
1808 percentiles = np.linspace(0.0, 100.0, n_bin_edges)
1809 bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], percentiles))
1810 else:
1811 # Correct the upper bound such that all values are strictly smaller than the upper bound
1812 # Make one step in single precision in the positive direction
1813 bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1814
1815 # Reinforce the upper and lower bound to be exact
1816 # Also expand the upper bound by an epsilon
1817 # to prevent the highest value in xs from going in the overflow bin
1818 bin_edges[0] = lower_bound
1819 bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1820 debug('Bins %s', bin_edges)
1821
1822 else:
1823 # Fall back if the array contains only one value
1824 bin_edges = [lower_bound, upper_bound + 1]
1825
1826 # Construct a float array forwardable to root.
1827 bin_edges = array.array('d', bin_edges)
1828 debug('Bins %s for %s', bin_edges, self.name)
1829 return bin_edges, None
1830
1832 xs,
1833 stackbys=None,
1834 n_bins=None,
1835 lower_bound=None,
1836 upper_bound=None,
1837 outlier_z_score=None,
1838 include_exceptionals=True):
1839 """Calculates the number of bins, the lower bound and the upper bound from a given data series
1840 estimating the values that are not given.
1841
1842 If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1843 The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1844 mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1845 quantile range (IQR).
1846
1847 If additional include_exceptionals is true the method tries to find exceptional values in the series
1848 and always include them in the range if it finds any.
1849 Exceptional values means exact values that appear often in the series for whatever reason.
1850 Possible reasons include
1851 * Integral / default values
1852 * Failed evaluation conditions
1853 * etc.
1854 which should be not cropped away automatically if you are locking on the quality of your data.
1855
1856 Parameters
1857 ----------
1858 xs : numpy.ndarray (1d)
1859 Data point for which a binning should be found.
1860 stackbys : numpy.ndarray (1d)
1861 Categories of the data points to be distinguishable
1862 n_bins : int or None, optional
1863 Preset number of desired bins. The default, None, means the bound should be extracted from data.
1864 The rice rule is used the determine the number of bins.
1865 lower_bound : float or None, optional
1866 Preset lower bound of the binning range.
1867 The default, None, means the bound should be extracted from data.
1868 upper_bound : float or None, optional
1869 Preset upper bound of the binning range.
1870 The default, None, means the bound should be extracted from data.
1871 outlier_z_score : float or None, optional
1872 Threshold z-score of outlier detection.
1873 The default, None, means no outlier detection.
1874 include_exceptionals : bool, optional
1875 If the outlier detection is active this switch indicates,
1876 if values detected as exceptionally frequent shall be included
1877 nevertheless into the binning range. Default is True,
1878 which means exceptionally frequent values as included
1879 even if they are detected as outliers.
1880
1881 Returns
1882 -------
1883 n_bins, lower_bound, upper_bound : int, float, float
1884 A triple of found number of bins, lower bound and upper bound of the binning range.
1885 """
1886
1887 if stackbys is not None:
1888 unique_stackbys = np.unique(stackbys)
1889 stack_ranges = []
1890 for value in unique_stackbys:
1891 if np.isnan(value):
1892 indices_for_value = np.isnan(stackbys)
1893 else:
1894 indices_for_value = stackbys == value
1895
1896 stack_lower_bound, stack_upper_bound = \
1897 self.determine_range(xs[indices_for_value],
1898 lower_bound=lower_bound,
1899 upper_bound=upper_bound,
1900 outlier_z_score=outlier_z_score,
1901 include_exceptionals=include_exceptionals)
1902
1903 stack_ranges.append([stack_lower_bound, stack_upper_bound])
1904
1905 lower_bound = np.nanmin([lwb for lwb, upb in stack_ranges])
1906 upper_bound = np.nanmax([upb for lwb, upb in stack_ranges])
1907
1908 else:
1909 lower_bound, upper_bound = self.determine_range(xs,
1910 lower_bound=lower_bound,
1911 upper_bound=upper_bound,
1912 outlier_z_score=outlier_z_score,
1913 include_exceptionals=include_exceptionals)
1914
1915 if n_bins is None:
1916 # Assume number of bins according to the rice rule.
1917 # The number of data points should not include outliers.
1918 n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1919 rice_n_bins = int(statistics.rice_n_bin(n_data))
1920 n_bins = rice_n_bins
1921
1922 else:
1923 n_bins = int(n_bins)
1924 # Do not allow negative bin numbers
1925 if not n_bins > 0:
1926 message = f'Cannot accept n_bins={n_bins} as number of bins, because it is not a number greater than 0.'
1927 raise ValueError(message)
1928
1929 return n_bins, lower_bound, upper_bound
1930
1932 xs,
1933 lower_bound=None,
1934 upper_bound=None,
1935 outlier_z_score=None,
1936 include_exceptionals=True):
1937 """
1938 Parameters
1939 ----------
1940 xs : numpy.ndarray (1d)
1941 Data point for which a binning should be found.
1942 lower_bound : float or None, optional
1943 Preset lower bound of the binning range.
1944 The default, None, means the bound should be extracted from data.
1945 upper_bound : float or None, optional
1946 Preset upper bound of the binning range.
1947 The default, None, means the bound should be extracted from data.
1948 outlier_z_score : float or None, optional
1949 Threshold z-score of outlier detection.
1950 The default, None, means no outlier detection.
1951 include_exceptionals : bool, optional
1952 If the outlier detection is active this switch indicates,
1953 if values detected as exceptionally frequent shall be included
1954 nevertheless into the binning range. Default is True,
1955 which means exceptionally frequent values as included
1956 even if they are detected as outliers.
1957
1958 Returns
1959 -------
1960 lower_bound, upper_bound : float, float
1961 A pair of found lower bound and upper bound of series.
1962 """
1963 debug = get_logger().debug
1964
1965 finite_xs_indices = np.isfinite(xs)
1966 if np.any(finite_xs_indices):
1967 finite_xs = xs[finite_xs_indices]
1968 else:
1969 finite_xs = xs
1970
1971 make_symmetric = False
1972 exclude_outliers = outlier_z_score is not None and (lower_bound is None or upper_bound is None)
1973
1974 # Look for exceptionally frequent values in the series, e.g. integral delta values like -999
1975 if include_exceptionals or exclude_outliers:
1976 exceptional_xs = self.get_exceptional_values(finite_xs)
1977 exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1978
1979 # Prepare for the estimation of outliers
1980 if exclude_outliers:
1981 if not np.all(exceptional_indices):
1982 # Exclude exceptional values from the estimation to be unbiased
1983 # even in case exceptional values fall into the central region near the mean
1984 x_mean, x_std = self.get_robust_mean_and_std(finite_xs[~exceptional_indices])
1985 else:
1986 x_mean, x_std = np.nan, np.nan
1987
1988 make_symmetric = abs(x_mean) < x_std / 5.0 and lower_bound is None and upper_bound is None
1989
1990 if include_exceptionals and len(exceptional_xs) != 0:
1991 lower_exceptional_x = np.min(exceptional_xs)
1992 upper_exceptional_x = np.max(exceptional_xs)
1993 make_symmetric = False
1994 else:
1995 lower_exceptional_x = np.nan
1996 upper_exceptional_x = np.nan
1997
1998 # Find the lower bound, if it is not given.
1999 if lower_bound is None:
2000 try:
2001 lower_bound = np.min(finite_xs)
2002 except ValueError:
2003 lower_bound = -999
2004 # Clip the lower bound by outliers that exceed the given z score
2005 if outlier_z_score is not None:
2006 # The lower bound at which outliers exceed the given z score
2007 lower_outlier_bound = x_mean - outlier_z_score * x_std
2008
2009 # Clip the lower bound such that it concides with an actual value,
2010 # which prevents empty bins from being produced
2011 indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2012
2013 if np.any(indices_above_lower_outlier_bound):
2014 lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2015
2016 # However we want to include at least the exceptional values in the range if there are any.
2017 lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2018
2019 debug('Lower bound after outlier detection')
2020 debug('Lower bound %s', lower_bound)
2021 debug('Lower outlier bound %s', lower_outlier_bound)
2022
2023 # Find the upper bound, if it is not given
2024 if upper_bound is None:
2025 try:
2026 upper_bound = np.max(finite_xs)
2027 except ValueError:
2028 upper_bound = 999
2029 if outlier_z_score is not None:
2030 # The upper bound at which outliers exceed the given z score
2031 upper_outlier_bound = x_mean + outlier_z_score * x_std
2032
2033 # Clip the upper bound such that it concides with an actual value,
2034 # which prevents empty bins from being produced
2035 indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2036
2037 if np.any(indices_below_upper_outlier_bound):
2038 upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2039
2040 # However we want to include at least the exceptional values in the range if there are any.
2041 upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2042
2043 debug('Upper bound after outlier detection')
2044 debug('Upper bound %s', upper_bound)
2045 debug('Upper outlier bound %s', upper_outlier_bound)
2046
2047 if make_symmetric and lower_bound < 0 and upper_bound > 0:
2048 if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2049 abs_bound = max(abs(lower_bound), abs(upper_bound))
2050 lower_bound = -abs_bound
2051 upper_bound = abs_bound
2052
2053 return lower_bound, upper_bound
2054
2055 @classmethod
2056 def set_additional_stats_tf1(cls, histogram):
2057 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2058
2059 Parameters
2060 ----------
2061 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2062 Something having a GetListOfFunctions method that should hold
2063 the combined fit and additional statistics function.
2064 """
2065 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2066 cls.set_tf1(histogram, additional_stats_tf1)
2067
2068 @classmethod
2069 def set_fit_tf1(cls, histogram, fit_tf1):
2070 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2071
2072 Parameters
2073 ----------
2074 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2075 Something having a GetListOfFunctions method that should hold
2076 the combined fit and additional statistics function.
2077 """
2078 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2079 combined_tf1 = cls.combine_fit_and_additional_stats(fit_tf1, additional_stats_tf1)
2080 cls.set_tf1(histogram, combined_tf1)
2081
2082 @classmethod
2083 def set_tf1(cls, histogram, tf1):
2084 """Set the attached TF1 of the histogram.
2085
2086 Parameters
2087 ----------
2088 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2089 Something having a GetListOfFunctions method that should hold
2090 the combined fit and additional statistics function.
2091 """
2092 # Delete any functions formally added
2093 cls.delete_tf1(histogram)
2094 if tf1 is not None:
2095 tf1.SetName("FitAndStats")
2096 histogram.GetListOfFunctions().Add(tf1)
2097
2098 @classmethod
2099 def delete_tf1(cls, histogram):
2100 """Delete the attached TF1 from the histogram
2101
2102 Parameters
2103 ----------
2104 histogram : ROOT.TH1 or ROOT.TGraph
2105 Something having a GetListOfFunctions method that holds the fit function
2106 """
2107 tf1 = histogram.FindObject("FitAndStats")
2108 if tf1:
2109 function_list = histogram.GetListOfFunctions()
2110 function_list.Remove(tf1)
2111
2112 @classmethod
2113 def create_additional_stats_tf1(cls, histogram):
2114 """Create a TF1 with the additional statistics from the histogram as parameters.
2115
2116 Parameters
2117 ----------
2118 histogram : ROOT.TH1 or ROOT.TGraph
2119 Something having a GetListOfFunctions method that holds the additional statistics.
2120
2121 Returns
2122 -------
2123 ROOT.TF1
2124 Function with the additional statistics as parameters.
2125 """
2126
2127 additional_stats = cls.get_additional_stats(histogram)
2128 if not additional_stats:
2129 return None
2130
2131 # Create dummy function, which displays additional statistics in the legend, when added to a histogram.
2132 # Dummy range to serve the functions
2133 lower_bound = 0
2134 upper_bound = 0
2135
2136 # Create a formula which is zero in all cases but has space for n parameters
2137 # Formula string looks like 0*[0]+0*[1]+0*[2]+...
2138 formula_string = '+'.join('0*[' + str(i) + ']' for i in range(len(additional_stats)))
2139
2140 # Compose a function that carries the additional information
2141 additional_stats_tf1 = ROOT.TF1("Stats", formula_string, lower_bound, upper_bound)
2142
2143 for (i, (label, value)) in enumerate(additional_stats.items()):
2144 # root 6 does not like labels with spaces ..
2145 # bug report: https://sft.its.cern.ch/jira/browse/ROOT-7460
2146 # therefore this workaround:
2147 label = label.replace(" ", "-")
2148 additional_stats_tf1.SetParName(i, label)
2149 additional_stats_tf1.FixParameter(i, value)
2150
2151 return additional_stats_tf1
2152
2153 @classmethod
2154 def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1):
2155 """Combine the fit function and the function carrying the additional statistics to one function.
2156
2157 Parameters
2158 ----------
2159 fit_tf1 : ROOT.TF1
2160 The fit function
2161 additional_stats_tf1 : ROOT.TF1
2162 The function carrying the additional statistics as parameters
2163
2164 Returns
2165 -------
2166 ROOT.TF1
2167 """
2168 if additional_stats_tf1 is None:
2169 return fit_tf1
2170
2171 # Combine both TF1 functions
2172 # Get the lower and upper bound of the fit
2173 # Use the pass-by reference containers from pyROOT to be able to call the function
2174 lower_bound = ctypes.c_double()
2175 upper_bound = ctypes.c_double()
2176 fit_tf1.GetRange(lower_bound, upper_bound)
2177 title = fit_tf1.GetTitle()
2178
2179 combined_formula = additional_stats_tf1.GetExpFormula().Data() + '+' + fit_tf1.GetExpFormula().Data()
2180 combined_tf1 = ROOT.TF1("Combined", combined_formula, lower_bound.value, upper_bound.value)
2181 combined_tf1.SetTitle(title)
2182
2183 # Transfer the fitted parameters
2184 chi2 = fit_tf1.GetChisquare()
2185 combined_tf1.SetChisquare(chi2)
2186
2187 ndf = fit_tf1.GetNDF()
2188 combined_tf1.SetNDF(ndf)
2189
2190 n_stats_parameters = additional_stats_tf1.GetNpar()
2191 # n_fit_parameters = fit_tf1.GetNpar()
2192 cls.copy_tf1_parameters(additional_stats_tf1, combined_tf1)
2193 cls.copy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2194
2195 return combined_tf1
2196
2197 @classmethod
2198 def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0):
2199 """Copy the parameters of one TF1 to another.
2200
2201 Parameters
2202 ----------
2203 tf1_source : ROOT.TF1
2204 Take parameters from here
2205 tf1_target : ROOT.TF1
2206 Copy them to here.
2207 offset : int, optional
2208 Index of the first target parameter to which to copy.
2209 """
2210 n_parameters = tf1_source.GetNpar()
2211
2212 # Helper variables for pyROOT's mechanism to call functions by reference
2213 lower_bound = ctypes.c_double()
2214 upper_bound = ctypes.c_double()
2215
2216 for i_source in range(n_parameters):
2217 parameter_name = tf1_source.GetParName(i_source)
2218 i_target = tf1_target.GetParNumber(parameter_name)
2219
2220 # Workaround for a ROOT bug
2221 if i_target == -1:
2222 for i_target in range(tf1_target.GetNpar()):
2223 if parameter_name == tf1_target.GetParName(i_target):
2224 break
2225 else:
2226 i_target = -1
2227 continue
2228
2229 tf1_target.SetParameter(i_target,
2230 tf1_source.GetParameter(i_source))
2231 tf1_target.SetParError(i_target,
2232 tf1_source.GetParError(i_source))
2233
2234 tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2235 tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2236
2238 """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2239 # Forward the attributes to the plot by auto assignment
2240
2242
2244
2246
2247
2249
2251
2253
2254 def set_maximum(self, maximum):
2255 """Sets the maximum of the vertical plotable range"""
2256 for histogram in self.histograms:
2257 if isinstance(histogram, ROOT.TH1):
2258 histogram.SetMaximum(histogram.GetMaximum(maximum))
2259 else:
2260 histogram.SetMaximum(maximum)
2261
2262 def set_minimum(self, minimum):
2263 """Sets the minimum of the vertical plotable range"""
2264 for histogram in self.histograms:
2265 if isinstance(histogram, ROOT.TH1):
2266 histogram.SetMinimum(histogram.GetMinimum(minimum))
2267 else:
2268 histogram.SetMinimum(minimum)
2269
2270 @classmethod
2271 def set_tstyle(cls):
2272 """Set the style such that the additional stats entries are shown by the TBrowser"""
2273 belle2_validation_style_name = "belle2_validation_style"
2274 belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2275 if not belle2_validation_tstyle:
2276 belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2277
2278 opt_fit = 112
2279 belle2_validation_tstyle.SetOptFit(opt_fit)
2280
2281 opt_stat = 111111
2282 belle2_validation_tstyle.SetOptStat(opt_stat)
2283 ROOT.gROOT.SetStyle(belle2_validation_style_name)
2284
2285 # belle2_validation_tstyle.SetLineStyleString(cls.very_sparse_dots_line_style_index, "4 2000")
2286
2287 else:
2288 belle2_validation_tstyle.cd()
2289
2290
2291def test():
2292 """Simple test method"""
2293 ValidationPlot.set_tstyle()
2294
2295 # Test a histogram plot with some nan and inf values
2296 normal_distributed_values = np.random.randn(1000)
2297
2298 for i in range(10):
2299 normal_distributed_values[i] = np.nan
2300
2301 for i in range(10, 20):
2302 normal_distributed_values[i] = np.inf
2303
2304 for i in range(20, 30):
2305 normal_distributed_values[i] = -np.inf
2306
2307 validation_histogram = ValidationPlot('test_hist')
2308 validation_histogram.hist(normal_distributed_values)
2309 validation_histogram.title = 'A normal distribution'
2310 validation_histogram.xlabel = 'normal'
2311 validation_histogram.ylabel = 'frequency'
2312 validation_histogram.fit_gaus()
2313
2314 # Test for a cumulated histogram - cumulation from left to right
2315 cumulated_histogram = ValidationPlot('test_cum_hist')
2316 cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2317 cumulated_histogram.title = 'A cumulated normal distribution'
2318 cumulated_histogram.xlabel = 'normal'
2319 cumulated_histogram.ylabel = 'cdf'
2320 cumulated_histogram.show()
2321
2322 # Test stacked histogram
2323 # Make a random selection of 40%
2324 stackby = np.random.binomial(1.0, 0.40, 1000)
2325 stacked_validation_histogram = ValidationPlot('test_stacked_hist')
2326 stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2327
2328 # Make a scatterplot with two species.
2329 x = np.random.randn(1000)
2330 y = 3 * np.random.randn(1000)
2331 ones = np.ones_like(x)
2332 angle = np.pi / 2
2333
2334 x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2335 y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2336
2337 stacked_validation_scatter = ValidationPlot('test_stacked_scatter')
2338 stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2339
2340 # Make a profile plot with the two species
2341 stacked_validation_profile = ValidationPlot('test_stacked_profile')
2342 stacked_validation_profile.profile(x1, y1, stackby=stackby)
2343
2344 # Make a two dimensional histogram with two species
2345 stacked_validation_hist2d = ValidationPlot('test_stacked_hist2d')
2346 stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2347
2348 # Test a profile with a diagonal fit
2349 x = np.linspace(-1, 1, 1000)
2350 y = x.copy()
2351 x[0] = np.nan
2352 diagonal_plot = ValidationPlot('test_diag')
2353 diagonal_plot.profile(x, y, bins=50)
2354 diagonal_plot.fit_line()
2355
2356 # Test if cumulation works for profile plots
2357 cumulated_profile = ValidationPlot('test_cum_profile')
2358 cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2359
2360 tfile = ROOT.TFile('test.root', 'RECREATE')
2361
2362 validation_histogram.write(tfile)
2363
2364 with root_cd("expert") as tdirectory1:
2365 diagonal_plot.write(tdirectory1)
2366 cumulated_profile.write(tdirectory1)
2367 cumulated_histogram.write(tdirectory1)
2368
2369 with root_cd("stacked") as tdirectory2:
2370 stacked_validation_histogram.write(tdirectory2)
2371 stacked_validation_scatter.write()
2372 stacked_validation_profile.write()
2373 stacked_validation_hist2d.write()
2374
2375 tfile.Close()
2376
2377 tfile = ROOT.TFile('test.root')
2378 tBrowser = ROOT.TBrowser()
2379 tBrowser.BrowseObject(tfile)
2380 input()
2381 tfile.Close()
2382
2383
2384if __name__ == '__main__':
2385 test()
title
cached value of the title for this plot
Definition: plot.py:2252
pvalue_warn
custom levels for pvalue warnings
Definition: plot.py:232
def hist(self, xs, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, is_expert=True)
Definition: plot.py:251
y_log
Indicator whether the y axes should be displayed as a log scale.
Definition: plot.py:238
def create_stack(cls, histograms, name, reverse_stack, force_graph=False)
Definition: plot.py:1090
def create_additional_stats_tf1(cls, histogram)
Definition: plot.py:2113
description
description of the plot
Definition: plot.py:2245
contact
contact information for this plot
Definition: plot.py:2243
upper_bound
upper right corner of the hisogram
Definition: plot.py:543
def write(self, tdirectory=None)
Definition: plot.py:756
def fit_gaus(self, z_score=None)
Definition: plot.py:617
ylabel
default label for the histogram's Y axis
Definition: plot.py:279
int very_sparse_dots_line_style_index
A an index that reference to a dot spacing such that the line is almost invisible for scatter.
Definition: plot.py:181
def profile(self, xs, ys, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, y_binary=None, y_log=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, gaus_z_score=None, is_expert=True, is_asymmetry=False)
Definition: plot.py:299
def is_discrete(xs, max_n_unique=None)
Definition: plot.py:930
_contact
Contact email address for display on the validation page.
Definition: plot.py:211
pvalue_error
custom levels for pvalue errors
Definition: plot.py:235
def grapherrors(self, xs_and_err, ys_and_err, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
Definition: plot.py:437
def delete_tf1(cls, histogram)
Definition: plot.py:2099
_title
Title of the validation plot.
Definition: plot.py:220
def create_1d(self, th1_factory, xs, ys=None, weights=None, bins=None, stackby=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None)
Definition: plot.py:993
def description(self, description)
Definition: plot.py:864
def __init__(self, name, referenceFileName=None)
Definition: plot.py:184
def add_stats_entry(cls, histogram, label, value)
Definition: plot.py:1467
_xlabel
X axes label of the validation plot.
Definition: plot.py:214
xlabel
cached value of the x-axis label for this plot
Definition: plot.py:2248
def set_tf1(cls, histogram, tf1)
Definition: plot.py:2083
def fill_into(self, plot, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1250
def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1)
Definition: plot.py:2154
def create(self, histogram_template, xs, ys=None, weights=None, stackby=None, cumulation_direction=None, reverse_stack=None)
Definition: plot.py:1054
def set_maximum(self, maximum)
Definition: plot.py:2254
def add_nan_inf_stats(cls, histogram, name, xs)
Definition: plot.py:1441
def set_color(self, tobject, root_i_color)
Definition: plot.py:1231
def cumulate(cls, histogram, cumulation_direction=None)
Definition: plot.py:1573
def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0)
Definition: plot.py:2198
_is_expert
per default all plots are expert and must be set to non-expert explicitly
Definition: plot.py:229
def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None)
Definition: plot.py:684
_description
Description of the plot purpose for display on the validation page.
Definition: plot.py:205
histograms
A list of the histograms that make up the plot.
Definition: plot.py:226
def get_additional_stats(cls, histogram)
Definition: plot.py:1484
_check
Detailed check instructions for display on the validation page.
Definition: plot.py:208
name
A unique name to be used as the name of the ROOT object to be generated.
Definition: plot.py:199
def set_fit_tf1(cls, histogram, fit_tf1)
Definition: plot.py:2069
def set_minimum(self, minimum)
Definition: plot.py:2262
def fill_into_grouped(self, histogram_template, xs, ys=None, weights=None, groupbys=None, groupby_label="group")
Definition: plot.py:1199
plot
The main plot object, may contain one or more (in case of stacked pltos) histograms.
Definition: plot.py:223
def determine_range(self, xs, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1936
def set_additional_stats_tf1(cls, histogram)
Definition: plot.py:2056
_ylabel
Y axes label of the validation plot.
Definition: plot.py:217
referenceFileName
name of the reference file, if not None the binning will be read from there
Definition: plot.py:202
def fill_into_tgrapherror(self, graph, xs, ys, filter=None)
Definition: plot.py:1264
def hist2d(self, xs, ys, weights=None, stackby=None, bins=(None, None), lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), allow_discrete=(False, False), quantiles=None, is_expert=True)
Definition: plot.py:501
lower_bound
lower left corner of the histogram
Definition: plot.py:541
def determine_bin_range(self, xs, stackbys=None, n_bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
Definition: plot.py:1838
def determine_bin_edges(self, xs, stackbys=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False)
Definition: plot.py:1679
def gaus_slice_fit(cls, th2, name, z_score=None)
Definition: plot.py:1507
check
cached value of the user-check action for this plot
Definition: plot.py:2241
def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None)
Definition: plot.py:1358
def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False)
Definition: plot.py:1122
def fill_into_tgraph(self, graph, xs, ys, filter=None)
Definition: plot.py:1275