31 """Get for the logging.Logger instance of this module
36 Logger instance of this module
38 return logging.getLogger(__name__)
42units_by_quantity_name = {
67def get_unit(quantity_name):
68 """Infers the unit of a quantity from its name.
70 Assumes the standard Belle II unit system.
72 Currently looks up the quantity string from units_by_quantity_name.
77 Name of a quantity (E.g. pt, x, ...)
84 unit = units_by_quantity_name.get(quantity_name,
None)
88def compose_axis_label(quantity_name, unit=None):
89 """Formats a quantity name and a unit to a label for a plot axes.
91 If the unit is not given to is tried to infer it
92 from the quantity name by the get_unit function.
97 Name of the quantity to be displayed at the axes
99 The unit of the quantity. Defaults to get_unit(quantity_name)
107 unit = get_unit(quantity_name)
110 axis_label = quantity_name
112 axis_label = f
'{quantity_name} ({unit})'
117def get1DBinningFromReference(name, refFileName):
118 """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
120 @param name : name of the TH1 object to be looked for in the file
121 @param refFileName : name of the reference file where the object is searched for
123 @return int nbin, float xmin, float xmax of the TH1
130 if refFileName
is None or refFileName ==
"":
131 return nbins, x_min, x_max
134 oldDirectory = ROOT.gROOT.CurrentDirectory().load()
136 tfile = ROOT.TFile(refFileName)
138 objptr = tfile.Get(name)
139 if objptr
and objptr.InheritsFrom(
"TH1"):
140 nbins = objptr.GetNbinsX()
141 x_min = objptr.GetXaxis().GetXmin()
142 x_max = objptr.GetXaxis().GetXmax()
144 basf2.B2WARNING(
'Requested object with name: ' + name +
' not found in file: ' + refFileName +
" (or not a TH1)")
146 basf2.B2WARNING(
'Requested file: ' + refFileName +
' could not be opened')
153 return nbins, x_min, x_max
157StatsEntry = ROOT.TParameter(float)
162 """Class for generating a validation plot for the Belle II validation page.
164 Typically it generates plots from values stored in numpy arrays and feeds them into
165 plot ROOT classes for storing them.
167 It implements an automatic binning procedure based on the rice rule and
168 robust z score outlier detection.
170 It also keeps track of additional statistics typically neglected by ROOT such as a count
171 for the non finit values such as NaN, +Inf, -Inf.
173 The special attributes for the Belle II validation page like
178 are exposed as properties of this class.
182 very_sparse_dots_line_style_index = 28
185 def __init__(self, name, referenceFileName=None):
186 """Constructor of the ValidationPlot
191 A unique name to be used as the name of the ROOT object to be generated
193 referenceFileName : str
194 name of a reference file. If set the code will try to get the histogram or profile
195 from that file and determine the number of bins and upper and lower bound
196 (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
200 self.name = root_save_name(name)
203 self.referenceFileName = referenceFileName
206 self._description =
''
230 self._is_expert =
True
233 self.pvalue_warn =
None
236 self.pvalue_error =
None
248 outlier_z_score=None,
249 include_exceptionals=True,
250 allow_discrete=False,
251 cumulation_direction=None,
253 """Fill the plot with a one dimensional histogram."""
256 if self.referenceFileName
is not None:
257 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
258 if n
is not None and xmin
is not None and xmax
is not None:
263 th1_factory = ROOT.TH1D
264 self._is_expert = is_expert
266 self.create_1d(th1_factory,
271 lower_bound=lower_bound,
272 upper_bound=upper_bound,
273 outlier_z_score=outlier_z_score,
274 include_exceptionals=include_exceptionals,
275 allow_discrete=allow_discrete,
276 cumulation_direction=cumulation_direction)
280 self.ylabel =
'count'
294 outlier_z_score=None,
295 include_exceptionals=True,
296 allow_discrete=False,
297 cumulation_direction=None,
301 """Fill the plot with a one dimensional profile of one variable over another."""
304 if self.referenceFileName
is not None:
308 n, xmin, xmax = get1DBinningFromReference(self.name, self.referenceFileName)
309 if n
is not None and xmin
is not None and xmax
is not None:
314 th1_factory = ROOT.TProfile
315 self._is_expert = is_expert
316 if gaus_z_score
is None:
317 self.create_1d(th1_factory,
323 lower_bound=lower_bound,
324 upper_bound=upper_bound,
325 outlier_z_score=outlier_z_score,
326 include_exceptionals=include_exceptionals,
327 allow_discrete=allow_discrete,
328 cumulation_direction=cumulation_direction)
331 self.name =
"_" + self.name
332 self.hist2d(xs, ys=ys, weights=weights, stackby=stackby,
334 lower_bound=(lower_bound,
None),
335 upper_bound=(upper_bound,
None),
336 outlier_z_score=(outlier_z_score, outlier_z_score),
337 include_exceptionals=(include_exceptionals,
True),
338 allow_discrete=(allow_discrete,
False),
341 self.name = self.name[1:]
343 for histogram
in self.histograms:
344 profile = self.gaus_slice_fit(histogram,
345 name=histogram.GetName()[1:],
346 z_score=gaus_z_score)
347 profiles.append(profile)
348 self.histograms = profiles
349 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=
False)
354 if y_binary
or self.is_binary(ys):
356 self.ylabel =
'probability'
361 for histogram
in self.histograms:
362 histogram.SetMinimum(min_y)
363 histogram.SetMaximum(1.05)
365 self.plot.SetMinimum(min_y)
366 self.plot.SetMaximum(1.05)
374 lower_bound=(
None,
None),
375 upper_bound=(
None,
None),
376 outlier_z_score=(
None,
None),
377 include_exceptionals=(
True,
True),
380 """Fill the plot with a (unbinned) two dimensional scatter plot"""
382 self._is_expert = is_expert
384 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
385 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
386 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
387 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
389 x_lower_bound, x_upper_bound = self.determine_range(
391 lower_bound=x_lower_bound,
392 upper_bound=x_upper_bound,
393 outlier_z_score=x_outlier_z_score,
394 include_exceptionals=x_include_exceptionals
397 y_lower_bound, y_upper_bound = self.determine_range(
399 lower_bound=y_lower_bound,
400 upper_bound=y_upper_bound,
401 outlier_z_score=y_outlier_z_score,
402 include_exceptionals=y_include_exceptionals
405 graph = ROOT.TGraph()
407 graph.SetName(self.name)
408 graph.SetMarkerStyle(6)
409 graph.GetHistogram().SetOption(
"AP")
414 graph.SetLineColorAlpha(color_index, 0)
415 graph.SetLineStyle(self.very_sparse_dots_line_style_index)
418 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
419 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
429 def grapherrors(self,
433 lower_bound=(
None,
None),
434 upper_bound=(
None,
None),
435 outlier_z_score=(
None,
None),
436 include_exceptionals=(
True,
True),
439 """Fill the plot with a (unbinned) two dimensional scatter plot
440 xs_and_err and ys_and_err are tuples containing the values and the errors on these values
444 self._is_expert = is_expert
449 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
450 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
451 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
452 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
454 x_lower_bound, x_upper_bound = self.determine_range(
456 lower_bound=x_lower_bound,
457 upper_bound=x_upper_bound,
458 outlier_z_score=x_outlier_z_score,
459 include_exceptionals=x_include_exceptionals
462 y_lower_bound, y_upper_bound = self.determine_range(
464 lower_bound=y_lower_bound,
465 upper_bound=y_upper_bound,
466 outlier_z_score=y_outlier_z_score,
467 include_exceptionals=y_include_exceptionals
470 graph = ROOT.TGraphErrors()
472 graph.SetName(self.name)
473 graph.GetHistogram().SetOption(
"A")
475 graph.SetMarkerColor(4)
476 graph.SetMarkerStyle(21)
479 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
480 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
496 lower_bound=(
None,
None),
497 upper_bound=(
None,
None),
498 outlier_z_score=(
None,
None),
499 include_exceptionals=(
True,
True),
500 allow_discrete=(
False,
False),
503 """Fill the plot with a two dimensional histogram"""
507 if quantiles
is not None:
508 name =
"_" + self.name
510 x_bins, y_bins = self.unpack_2d_param(bins)
511 x_lower_bound, y_lower_bound = self.unpack_2d_param(lower_bound)
512 x_upper_bound, y_upper_bound = self.unpack_2d_param(upper_bound)
513 x_outlier_z_score, y_outlier_z_score = self.unpack_2d_param(outlier_z_score)
514 x_include_exceptionals, y_include_exceptionals = self.unpack_2d_param(include_exceptionals)
515 x_allow_discrete, y_allow_discrete = self.unpack_2d_param(allow_discrete)
517 if quantiles
is not None:
518 y_include_exceptionals =
True
519 y_allow_discrete =
False
521 x_bin_edges, x_bin_labels = self.determine_bin_edges(xs,
524 lower_bound=x_lower_bound,
525 upper_bound=x_upper_bound,
526 outlier_z_score=x_outlier_z_score,
527 include_exceptionals=x_include_exceptionals,
528 allow_discrete=x_allow_discrete)
530 y_bin_edges, y_bin_labels = self.determine_bin_edges(ys,
533 lower_bound=y_lower_bound,
534 upper_bound=y_upper_bound,
535 outlier_z_score=y_outlier_z_score,
536 include_exceptionals=y_include_exceptionals,
537 allow_discrete=y_allow_discrete)
539 n_x_bins = len(x_bin_edges) - 1
540 n_y_bins = len(y_bin_edges) - 1
542 self.lower_bound = (x_bin_edges[0], y_bin_edges[0])
544 self.upper_bound = (x_bin_edges[-1], y_bin_edges[-1])
546 histogram = ROOT.TH2D(name,
554 get_logger().info(
"Scatter plot %s is discrete in x.", name)
555 x_taxis = histogram.GetXaxis()
556 for i_x_bin, x_bin_label
in enumerate(x_bin_labels):
557 x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
558 self.add_stats_entry(histogram,
"dx", 1)
561 x_bin_width = x_bin_edges[1] - x_bin_edges[0]
562 self.add_stats_entry(histogram,
"dx", x_bin_width)
565 get_logger().info(
"Scatter plot %s is discrete in y.", name)
566 y_taxis = histogram.GetYaxis()
567 for i_y_bin, y_bin_label
in enumerate(y_bin_labels):
568 y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
569 self.add_stats_entry(histogram,
"dy", 1)
572 y_bin_width = y_bin_edges[1] - y_bin_edges[0]
573 self.add_stats_entry(histogram,
"dy", y_bin_width)
575 self.create(histogram, xs, ys=ys, weights=weights, stackby=stackby)
577 if quantiles
is not None:
578 self.name = self.name[1:]
580 for histogram
in self.histograms:
581 for quantile
in quantiles:
582 profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] +
'_' + str(quantile))
585 x_taxis = histogram.GetXaxis()
586 new_x_taxis = profile.GetXaxis()
587 for i_bin
in range(x_taxis.GetNbins() + 2):
588 label = x_taxis.GetBinLabel(i_bin)
590 new_x_taxis.SetBinLabel(i_bin, label)
593 epsilon = sys.float_info.epsilon
594 for i_bin
in range(0, profile.GetNbinsX() + 2):
595 profile.SetBinError(i_bin, epsilon)
597 profiles.append(profile)
599 self.histograms = profiles
600 self.plot = self.create_stack(profiles, name=self.plot.GetName()[1:], reverse_stack=
False, force_graph=
True)
604 for histogram
in self.histograms:
605 x_taxis = histogram.GetXaxis()
606 x_bin_edges = array.array(
"d", list(range(len(x_bin_labels) + 1)))
607 x_taxis.Set(n_x_bins, x_bin_edges)
611 for histogram
in self.histograms:
612 x_taxis = histogram.GetXaxis()
613 y_bin_edges = array.array(
"d", list(range(len(y_bin_labels) + 1)))
614 y_taxis.Set(n_y_bins, y_bin_edges)
618 def fit_gaus(self, z_score=None):
619 """Fit a Gaus bell curve to the central portion of a one dimensional histogram
621 The fit is applied to the central mean +- z_score * std interval of the histogram,
622 such that it is less influence by non gaussian tails further away than the given z score.
624 @param float z_score number of sigmas to include from the mean value of the histogram.
631 raise RuntimeError(
'Validation plot must be filled before it can be fitted.')
633 if not isinstance(plot, ROOT.TH1D):
634 raise RuntimeError(
'Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
638 fit_tf1 = ROOT.TF1(
"Fit", formula)
639 fit_tf1.SetTitle(title)
640 fit_tf1.SetParName(0,
"n")
641 fit_tf1.SetParName(1,
"mean")
642 fit_tf1.SetParName(2,
"std")
644 n = histogram.GetSumOfWeights()
645 mean = histogram.GetMean()
646 std = histogram.GetStdDev()
648 fit_tf1.SetParameter(0, n)
649 fit_tf1.SetParameter(1, mean)
650 fit_tf1.SetParameter(2, std)
653 return self.fit(fit_tf1,
658 """Fit a general line to a one dimensional histogram"""
661 fit_tf1 = ROOT.TF1(
"Fit", formula)
662 fit_tf1.SetTitle(title)
663 fit_tf1.SetParName(0,
"slope")
664 fit_tf1.SetParName(1,
"intercept")
665 self.fit(fit_tf1,
'M')
668 """Fit a constant function to a one dimensional histogram"""
671 fit_tf1 = ROOT.TF1(
"Fit", formula)
672 fit_tf1.SetTitle(title)
673 fit_tf1.SetParName(0,
"intercept")
674 self.fit(fit_tf1,
'M')
677 """Fit a diagonal line through the origin to a one dimensional histogram"""
680 fit_tf1 = ROOT.TF1(
"Fit", formula)
681 fit_tf1.SetTitle(title)
682 fit_tf1.SetParName(0,
"slope")
683 self.fit(fit_tf1,
'M')
685 def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
686 """Fit a user defined function to a one dimensional histogram
691 Formula string or TH1 to be fitted. See TF1 constructors for that is a valid formula
693 Options string to be used in the fit. See TH1::Fit()
695 Lower bound of the range to be fitted
697 Upper bound of the range to be fitted
701 raise RuntimeError(
'Validation plot must be filled before it can be fitted.')
703 if not isinstance(plot, ROOT.TH1D):
704 raise RuntimeError(
'Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
708 xaxis = histogram.GetXaxis()
709 n_bins = xaxis.GetNbins()
710 hist_lower_bound = xaxis.GetBinLowEdge(1)
711 hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
713 if z_score
is not None:
714 mean = histogram.GetMean()
715 std = histogram.GetStdDev()
717 if lower_bound
is None:
718 lower_bound = mean - z_score * std
720 if upper_bound
is None:
721 upper_bound = mean + z_score * std
724 if isinstance(formula, ROOT.TF1):
726 fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
728 fit_tf1 = ROOT.TF1(
"Fit",
732 get_logger().info(
'Fitting with %s', fit_tf1.GetExpFormula())
735 if lower_bound
is None or lower_bound < hist_lower_bound:
736 lower_bound = hist_lower_bound
737 if upper_bound
is None or upper_bound > hist_upper_bound:
738 upper_bound = hist_upper_bound
742 if 'N' not in options:
745 fit_res = histogram.Fit(fit_tf1, options +
"S",
"", lower_bound, upper_bound)
747 self.set_fit_tf1(histogram, fit_tf1)
755 raise ValueError(
"Can not show a validation plot that has not been filled.")
757 def write(self, tdirectory=None):
758 """Write the plot to file
762 tdirectory : ROOT.TDirectory, optional
763 ROOT directory to which the plot should be written.
764 If omitted write to the current directory
767 raise ValueError(
"Can not write a validation plot that has not been filled.")
769 with root_cd(tdirectory):
770 ValidationPlot.set_tstyle()
771 if self.plot
not in self.histograms:
775 meta_options = [
"nostats"]
779 meta_options.append(
"expert")
781 meta_options.append(
"shifter")
784 if self.pvalue_error
is not None:
785 meta_options.append(f
"pvalue-error={self.pvalue_error}")
786 if self.pvalue_warn
is not None:
787 meta_options.append(f
"pvalue-warn={self.pvalue_warn}")
791 meta_options.append(
"logy")
793 meta_options_str =
",".join(meta_options)
795 for histogram
in self.histograms:
796 histogram.GetListOfFunctions().Add(ROOT.TNamed(
'MetaOptions', meta_options_str))
801 """Getter method if an plot plot is marked as expert plot"""
802 return self._is_expert
806 """Getter for the plot title"""
810 def title(self, title):
811 """Setter for the plot title"""
814 self.plot.SetTitle(title)
815 for histogram
in self.histograms:
816 histogram.SetTitle(title)
820 """Getter for the axis label at the x axis"""
824 def xlabel(self, xlabel):
825 """Setter for the axis label at the x axis"""
826 self._xlabel = xlabel
827 for histogram
in self.histograms:
828 histogram.GetXaxis().SetTitle(xlabel)
832 """Getter for the axis label at the y axis"""
836 def ylabel(self, ylabel):
837 """Setter for the axis label at the y axis"""
838 self._ylabel = ylabel
839 for histogram
in self.histograms:
840 histogram.GetYaxis().SetTitle(ylabel)
844 """Getter for the contact email address to be displayed on the validation page"""
848 def contact(self, contact):
849 """Setter for the contact email address to be displayed on the validation page"""
850 self._contact = contact
851 for histogram
in self.histograms:
852 found_obj = histogram.FindObject(
'Contact')
854 tnamed = ROOT.TNamed(
"Contact", contact)
855 histogram.GetListOfFunctions().Add(tnamed)
856 found_obj = histogram.FindObject(
'Contact')
857 found_obj.SetTitle(contact)
860 def description(self):
861 """Getter for the description to be displayed on the validation page"""
862 return self._description
865 def description(self, description):
866 """Setter for the description to be displayed on the validation page"""
867 self._description = description
868 for histogram
in self.histograms:
869 found_obj = histogram.FindObject(
'Description')
871 tnamed = ROOT.TNamed(
"Description", description)
872 histogram.GetListOfFunctions().Add(tnamed)
873 found_obj = histogram.FindObject(
'Description')
874 found_obj.SetTitle(description)
878 """Getter for the check to be displayed on the validation page"""
882 def check(self, check):
883 """Setter for the check to be displayed on the validation page"""
885 for histogram
in self.histograms:
886 found_obj = histogram.FindObject(
'Check')
888 tnamed = ROOT.TNamed(
"Check", check)
889 histogram.GetListOfFunctions().Add(tnamed)
890 found_obj = histogram.FindObject(
'Check')
891 found_obj.SetTitle(check)
897 def unpack_2d_param(param):
898 """Unpacks a function parameter for the two dimensional plots.
900 If it is a pair the first parameter shall apply to the x coordinate
901 the second to the y coordinate. In this case the pair is returned as two values
903 If something else is given the it is assumed that this parameter should equally apply
904 to both coordinates. In this case the same values is return twice as a pair.
908 param : pair or single value
909 Function parameter for a two dimensional plot
914 A pair of values being the parameter for the x coordinate and
915 the y coordinate respectively
919 x_param, y_param = param
923 return x_param, y_param
927 """Determine if the data consists of boolean values"""
928 return statistics.is_binary_series(xs)
931 def is_discrete(xs, max_n_unique=None):
932 """Determine if the data consists of discrete values"""
933 return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
936 def get_exceptional_values(xs):
937 """Find exceptionally frequent values
947 A list of the found exceptional values.
949 return statistics.rice_exceptional_values(xs)
952 def get_robust_mean_and_std(xs):
953 """Does an estimation of mean and standard deviation robust against outliers.
963 Pair of mean and standard deviation
965 x_mean = statistics.truncated_mean(xs)
966 x_std = statistics.trimmed_std(xs)
970 def format_bin_label(value):
971 """Formats a value to be placed at a tick on an axis."""
972 if np.isfinite(value)
and value == np.round(value):
973 return str(int(value))
975 formated_value = f
"{value:.5g}"
978 if len(formated_value) > 8:
979 formated_value = f
"{value:.3e}"
980 return formated_value
991 outlier_z_score=None,
992 include_exceptionals=True,
993 allow_discrete=False,
994 cumulation_direction=None):
995 """Combined factory method for creating a one dimensional histogram or a profile plot."""
999 xs = np.array(xs, copy=
False)
1002 ys = np.array(ys, copy=
False)
1004 if weights
is not None:
1005 weights = np.array(weights, copy=
False)
1007 bin_edges, bin_labels = self.determine_bin_edges(xs,
1010 lower_bound=lower_bound,
1011 upper_bound=upper_bound,
1012 outlier_z_score=outlier_z_score,
1013 include_exceptionals=include_exceptionals,
1014 allow_discrete=allow_discrete)
1016 n_bins = len(bin_edges) - 1
1017 self.lower_bound = bin_edges[0]
1018 self.upper_bound = bin_edges[-1]
1019 histogram = th1_factory(name,
'', n_bins, bin_edges)
1022 get_logger().info(
"One dimensional plot %s is discrete in x.", name)
1023 x_taxis = histogram.GetXaxis()
1024 for i_bin, bin_label
in enumerate(bin_labels):
1025 x_taxis.SetBinLabel(i_bin + 1, bin_label)
1026 self.add_stats_entry(histogram,
"dx", 1)
1029 bin_width = bin_edges[1] - bin_edges[0]
1030 self.add_stats_entry(histogram,
"dx", bin_width)
1032 self.create(histogram,
1037 cumulation_direction=cumulation_direction,
1043 for histogram
in self.histograms:
1044 x_taxis = histogram.GetXaxis()
1045 bin_edges = array.array(
"d", list(range(len(bin_labels) + 1)))
1046 x_taxis.Set(n_bins, bin_edges)
1054 cumulation_direction=None,
1055 reverse_stack=None):
1056 """Create histograms from a template, possibly stacked"""
1061 histogram = histogram_template
1062 self.fill_into(histogram, xs, ys, weights=weights)
1063 if cumulation_direction
is not None:
1064 histogram = self.cumulate(histogram, cumulation_direction=cumulation_direction)
1066 histograms.append(histogram)
1070 stackby = np.array(stackby, copy=
False)
1071 name = histogram_template.GetName()
1073 histograms = self.fill_into_grouped(histogram_template,
1078 groupby_label=
"stack")
1080 if cumulation_direction
is not None:
1081 histograms = [self.cumulate(histogram, cumulation_direction=cumulation_direction)
1082 for histogram
in histograms]
1084 plot = self.create_stack(histograms, name=name +
"_stacked", reverse_stack=reverse_stack)
1086 self.histograms = histograms
1088 self.attach_attributes()
1091 def create_stack(cls, histograms, name, reverse_stack, force_graph=False):
1092 """Create a stack of histograms"""
1093 if len(histograms) == 1:
1094 plot = histograms[0]
1096 if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph))
or force_graph:
1097 plot = ROOT.TMultiGraph()
1099 plot = ROOT.THStack()
1106 for histogram
in reversed(histograms):
1107 if isinstance(histogram, ROOT.TProfile)
or (isinstance(histogram, ROOT.TH1)
and force_graph):
1108 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1109 plot.Add(histogram,
"APZ")
1113 for histogram
in histograms:
1114 if isinstance(histogram, ROOT.TProfile)
or (isinstance(histogram, ROOT.TH1)
and force_graph):
1115 histogram = cls.convert_tprofile_to_tgrapherrors(histogram)
1116 plot.Add(histogram,
"APZ")
1123 def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False):
1124 """Extract errors from a TProfile histogram and create a TGraph from these"""
1125 if isinstance(tprofile, ROOT.TGraph):
1128 x_taxis = tprofile.GetXaxis()
1129 n_bins = x_taxis.GetNbins()
1132 bin_ids_without_underflow = list(range(1, n_bins + 1))
1134 bin_centers = np.array([x_taxis.GetBinCenter(i_bin)
for i_bin
in bin_ids_without_underflow])
1136 bin_centers = np.abs(bin_centers)
1137 bin_widths = np.array([x_taxis.GetBinWidth(i_bin)
for i_bin
in bin_ids_without_underflow])
1138 bin_x_errors = bin_widths / 2.0
1141 bin_contents = np.array([tprofile.GetBinContent(i_bin)
for i_bin
in bin_ids_without_underflow])
1142 bin_y_errors = np.array([tprofile.GetBinError(i_bin)
for i_bin
in bin_ids_without_underflow])
1144 tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1146 tgrapherrors.GetHistogram().SetOption(
"APZ")
1148 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1149 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1152 for tobject
in tprofile.GetListOfFunctions():
1153 tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1156 cls.add_stats_entry(tgrapherrors,
'count', tprofile.GetEntries())
1158 stats_values = np.array([np.nan] * 6)
1159 tprofile.GetStats(stats_values)
1161 sum_w = stats_values[0]
1163 sum_wx = stats_values[2]
1164 sum_wx2 = stats_values[3]
1165 sum_wy = stats_values[4]
1166 sum_wy2 = stats_values[5]
1168 cls.add_stats_entry(tgrapherrors,
1172 cls.add_stats_entry(tgrapherrors,
1174 np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1176 cls.add_stats_entry(tgrapherrors,
1180 cls.add_stats_entry(tgrapherrors,
1182 np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1184 cls.add_stats_entry(tgrapherrors,
1186 tgrapherrors.GetCovariance())
1188 cls.add_stats_entry(tgrapherrors,
1190 tgrapherrors.GetCorrelationFactor())
1194 def fill_into_grouped(self,
1200 groupby_label="group"):
1201 """Fill data into similar histograms in groups indicated by a groupby array"""
1204 unique_groupbys = np.unique(groupbys)
1205 name = histogram_template.GetName()
1207 for i_value, value
in enumerate(unique_groupbys):
1209 indices_for_value = np.isnan(groupbys)
1211 indices_for_value = groupbys == value
1214 histogram_for_value = histogram_template.Clone(name +
'_' + str(value))
1215 i_root_color = i_value + 1
1217 self.set_color(histogram_for_value, i_root_color)
1220 self.add_stats_entry(histogram_for_value, groupby_label, value)
1222 self.fill_into(histogram_for_value,
1226 filter=indices_for_value)
1228 histograms.append(histogram_for_value)
1232 def set_color(self, tobject, root_i_color):
1233 """Set the color of the ROOT object.
1235 By default the line color of a TGraph should be invisible, so do not change it
1236 For other objects set the marker and the line color
1240 tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1241 Object of which the color should be set.
1243 Color index of the ROOT color table
1245 if isinstance(tobject, ROOT.TGraph):
1246 tobject.SetMarkerColor(root_i_color)
1248 tobject.SetLineColor(root_i_color)
1249 tobject.SetMarkerColor(root_i_color)
1251 def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1252 """Fill the data into the plot object"""
1253 if isinstance(plot, ROOT.TGraph):
1255 raise ValueError(
"ys are required for filling a graph")
1256 self.fill_into_tgraph(plot, xs, ys, filter=filter)
1257 elif isinstance(plot, ROOT.TGraphErrors):
1259 raise ValueError(
"ys are required for filling a graph error")
1261 self.fill_into_tgrapherror(plot, xs, ys)
1263 self.fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1265 def fill_into_tgrapherror(self, graph, xs, ys, filter=None):
1266 """fill point values and error of the x and y axis into the graph"""
1268 assert (len(xs[0]) == len(ys[0]))
1270 graph.Set(len(xs[0]))
1272 for i
in range(len(xs[0])):
1273 graph.SetPoint(i, xs[0][i], ys[0][i])
1274 graph.SetPointError(i, xs[1][i], ys[1][i])
1276 def fill_into_tgraph(self, graph, xs, ys, filter=None):
1277 """Fill the data into a TGraph"""
1281 filter = slice(
None)
1291 if x_n_data > max_n_data
or y_n_data > max_n_data:
1292 get_logger().warning(f
"Number of points in scatter graph {self.name} exceed limit {max_n_data}")
1294 get_logger().warning(f
"Cropping {max_n_data}")
1296 xs = xs[0:max_n_data]
1297 ys = ys[0:max_n_data]
1299 x_axis = graph.GetXaxis()
1300 y_axis = graph.GetYaxis()
1302 x_lower_bound = x_axis.GetXmin()
1303 x_upper_bound = x_axis.GetXmax()
1305 y_lower_bound = y_axis.GetXmin()
1306 y_upper_bound = y_axis.GetXmax()
1308 x_underflow_indices = xs < x_lower_bound
1309 x_overflow_indices = xs > x_upper_bound
1311 y_underflow_indices = ys < y_lower_bound
1312 y_overflow_indices = ys > y_upper_bound
1314 plot_indices = ~(np.isnan(xs) |
1315 x_underflow_indices |
1316 x_overflow_indices |
1318 y_underflow_indices |
1321 n_plot_data = np.sum(plot_indices)
1322 plot_xs = xs[plot_indices]
1323 plot_ys = ys[plot_indices]
1325 graph.Set(int(n_plot_data))
1326 for i, (x, y)
in enumerate(zip(plot_xs, plot_ys)):
1327 graph.SetPoint(i, x, y)
1329 self.add_stats_entry(graph,
'count', np.sum(np.isfinite(xs)))
1331 self.add_nan_inf_stats(graph,
'x', xs)
1332 self.add_nan_inf_stats(graph,
'y', ys)
1334 x_n_underflow = np.sum(x_underflow_indices)
1336 self.add_stats_entry(graph,
'x underf.', x_n_underflow)
1338 x_n_overflow = np.sum(x_overflow_indices)
1340 self.add_stats_entry(graph,
'x overf.', x_n_overflow)
1342 y_n_underflow = np.sum(y_underflow_indices)
1344 self.add_stats_entry(graph,
'y underf.', y_n_underflow)
1346 y_n_overflow = np.sum(y_overflow_indices)
1348 self.add_stats_entry(graph,
'y overf.', y_n_overflow)
1350 self.add_stats_entry(graph,
'x avg', graph.GetMean(1))
1351 self.add_stats_entry(graph,
'x std', graph.GetRMS(1))
1353 self.add_stats_entry(graph,
'y avg', graph.GetMean(2))
1354 self.add_stats_entry(graph,
'y std', graph.GetRMS(2))
1356 self.add_stats_entry(graph,
'cov', graph.GetCovariance())
1357 self.add_stats_entry(graph,
'corr', graph.GetCorrelationFactor())
1359 def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None):
1360 """Fill the histogram blocking non finite values
1364 histogram : ROOT.TH1
1365 The histogram to be filled
1366 xs : numpy.ndarray (1d)
1367 Data for the first axes
1368 ys : numpy.ndarray (1d), optional
1369 Data for the second axes
1370 weights : numpy.ndarray (1d), optional
1371 Weight of the individual points. Defaults to one for each
1372 filter : numpy.ndarray, optional
1373 Boolean index array indicating which entries shall be taken.
1377 filter = slice(
None)
1381 self.add_nan_inf_stats(histogram,
'x', xs)
1382 finite_filter = np.isfinite(xs)
1387 self.add_nan_inf_stats(histogram,
'y', ys)
1388 finite_filter &= np.isfinite(ys)
1391 xs = xs[finite_filter]
1392 weights = np.ones_like(xs)
1394 weights = weights[filter]
1395 self.add_nan_inf_stats(histogram,
'w', weights)
1396 finite_filter &= np.isfinite(weights)
1397 xs = xs[finite_filter]
1398 weights[finite_filter]
1401 ys = ys[finite_filter]
1406 except AttributeError:
1407 Fill = histogram.Fill
1409 fill = np.frompyfunc(Fill, 2, 1)
1410 fill(xs.astype(np.float64, copy=
False),
1411 weights.astype(np.float64, copy=
False))
1413 fill = np.frompyfunc(Fill, 3, 1)
1414 fill(xs.astype(np.float64, copy=
False),
1415 ys.astype(np.float64, copy=
False),
1416 weights.astype(np.float64, copy=
False))
1420 xs = xs.astype(np.float64, copy=
False)
1421 weights = weights.astype(np.float64, copy=
False)
1424 histogram.FillN(n, xs, weights)
1426 basf2.B2WARNING(
"No values to be filled into histogram: " + self.name)
1430 xs = xs.astype(np.float64, copy=
False)
1431 ys = ys.astype(np.float64, copy=
False)
1432 weights = weights.astype(np.float64, copy=
False)
1435 histogram.FillN(n, xs, ys, weights)
1437 basf2.B2WARNING(
"No values to be filled into histogram: " + self.name)
1439 self.set_additional_stats_tf1(histogram)
1442 def add_nan_inf_stats(cls, histogram, name, xs):
1443 """ Extracts the counts of non finite floats from a series
1444 and adds them as additional statistics to the histogram.
1448 histogram : derived from ROOT.TH1 or ROOT.TGraph
1449 Something having a GetListOfFunctions method that
1451 A label for the data series to be prefixed to the entries.
1452 xs : numpy.ndarray (1d)
1453 Data from which the non finit floats should be counted.
1455 n_nans = np.isnan(xs).sum()
1457 cls.add_stats_entry(histogram, name +
' nan', n_nans)
1459 n_positive_inf = np.sum(xs == np.inf)
1460 if n_positive_inf > 0:
1461 cls.add_stats_entry(histogram, name +
' pos inf', n_positive_inf)
1463 n_negative_inf = np.sum(xs == -np.inf)
1464 if n_negative_inf > 0:
1465 cls.add_stats_entry(histogram, name +
' neg inf', n_negative_inf)
1468 def add_stats_entry(cls, histogram, label, value):
1469 """Add a new additional statistics to the histogram.
1473 histogram : derived from ROOT.TH1 or ROOT.TGraph
1474 Something having a GetListOfFunctions method that holds the additional statistics
1476 Label of the statistic
1478 Value of the statistic
1480 stats_entry = StatsEntry(str(label), float(value))
1481 histogram.GetListOfFunctions().Add(stats_entry)
1482 cls.set_additional_stats_tf1(histogram)
1485 def get_additional_stats(cls, histogram):
1486 """Get the additional statistics from the histogram and return them a dict.
1490 histogram : derived from ROOT.TH1 or ROOT.TGraph
1491 Something having a GetListOfFunctions method that holds the additional statistics
1495 collection.OrderedDict
1496 A map of labels to values for the additional statistics
1498 additional_stats = collections.OrderedDict()
1499 for tobject
in histogram.GetListOfFunctions():
1500 if isinstance(tobject, StatsEntry):
1501 stats_entry = tobject
1502 label = stats_entry.GetName()
1503 value = stats_entry.GetVal()
1504 additional_stats[label] = value
1505 return additional_stats
1508 def gaus_slice_fit(cls, th2, name, z_score=None):
1509 """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1512 y_taxis = th2.GetYaxis()
1513 th2_lower_bound = y_taxis.GetXmin()
1514 th2_upper_bound = y_taxis.GetXmax()
1515 th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1516 n_y_bins = y_taxis.GetNbins()
1518 y_mean = th2.GetMean(2)
1519 y_std = th2.GetStdDev(2)
1520 fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1521 fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1522 fit_height = fit_upper_bound - fit_lower_bound
1524 required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1526 fit_lower_bound = th2_lower_bound
1527 fit_upper_bound = th2_upper_bound
1528 fit_height = fit_upper_bound - fit_lower_bound
1529 required_n_bins_inslice_filled = n_y_bins / 1.61
1532 required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1534 fit_tf1 = ROOT.TF1(
"Fit",
"gaus", fit_lower_bound, fit_upper_bound)
1535 fit_tf1.SetParName(0,
"n")
1536 fit_tf1.SetParName(1,
"mean")
1537 fit_tf1.SetParName(2,
"std")
1541 param_fit_th1s = ROOT.TObjArray()
1542 th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1543 int(required_n_bins_inslice_filled),
1544 fit_options, param_fit_th1s)
1546 th1_means = param_fit_th1s.At(1)
1547 th1_means.SetName(name)
1548 th1_means.SetTitle(th2.GetTitle())
1549 for label, value
in cls.get_additional_stats(th2).items():
1550 cls.add_stats_entry(th1_means, label, value)
1553 x_taxis = th2.GetXaxis()
1554 new_x_taxis = th1_means.GetXaxis()
1555 for i_bin
in range(x_taxis.GetNbins() + 2):
1556 label = x_taxis.GetBinLabel(i_bin)
1558 new_x_taxis.SetBinLabel(i_bin, label)
1561 data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1562 data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1563 data_height = data_upper_bound - data_lower_bound
1565 plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1566 plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1568 th1_means.SetMinimum(plot_lower_bound)
1569 th1_means.SetMaximum(plot_upper_bound)
1574 def cumulate(cls, histogram, cumulation_direction=None):
1575 """Cumulates the histogram inplace.
1579 histogram : ROOT.TH1 or ROOT.TProfile
1580 Filled histogram to be cumulated
1581 cumulation_direction : int, optional
1582 Direction is indicated by the sign.
1583 Positive means from left to right, negative means from right to left.
1584 If now cumulation direction is given return the histogram as is.
1589 Cumulated histogram potentially altered inplace.
1591 if not cumulation_direction:
1594 cumulate_backward = cumulation_direction < 0
1595 cumulate_forward =
not cumulate_backward
1597 if isinstance(histogram, ROOT.TH2):
1598 raise ValueError(
"Cannot cumulate a two dimensional histogram.")
1600 if isinstance(histogram, ROOT.TH3):
1601 raise ValueError(
"Cannot cumulate a three dimensional histogram.")
1603 if not isinstance(histogram, ROOT.TH1):
1604 raise ValueError(
"Can only cumulate a one dimensional histogram.")
1606 if isinstance(histogram, ROOT.TProfile):
1607 tprofile = histogram
1609 tgraph = cls.convert_tprofile_to_tgrapherrors(histogram)
1610 tgraph.SetName(tprofile.GetName())
1612 n_bins = histogram.GetNbinsX()
1614 cumulated_content = 0.0
1615 cumulated_entries = 0
1619 i_bins = list(range(0, n_bins + 2))
1620 if not cumulate_forward:
1621 i_bins = reversed(i_bins)
1623 for i_bin
in i_bins:
1625 bin_content = tprofile.GetBinContent(i_bin)
1626 bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1627 bin_std = tprofile.GetBinError(i_bin)
1629 if bin_entries != 0:
1630 cumulated_content = (
1631 1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1632 (cumulated_entries + bin_entries)
1636 math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1637 (cumulated_entries + bin_entries)
1640 cumulated_entries = cumulated_entries + bin_entries
1645 if i_point >= 0
and i_point < n_points:
1646 x = tgraph.GetX()[i_point]
1650 tgraph.SetPoint(i_point, x, cumulated_content)
1652 x_error = tgraph.GetErrorX(i_point)
1653 tgraph.SetPointError(i_point, x_error, cumulated_std)
1658 n_bins = histogram.GetNbinsX()
1659 cumulated_content = 0.0
1661 i_bins = list(range(0, n_bins + 2))
1662 if not cumulate_forward:
1663 i_bins = reversed(i_bins)
1665 for i_bin
in i_bins:
1666 bin_content = histogram.GetBinContent(i_bin)
1667 cumulated_content += bin_content
1668 histogram.SetBinContent(i_bin, cumulated_content)
1672 def determine_bin_edges(self,
1678 outlier_z_score=None,
1679 include_exceptionals=True,
1680 allow_discrete=False):
1681 """Deducing bin edges from a data series.
1685 xs : numpy.ndarray (1d)
1686 Data point for which a binning should be found.
1687 stackbys : numpy.ndarray (1d)
1688 Categories of the data points to be distinguishable
1689 bins : list(float) or int or None, optional
1690 Preset bin edges or preset number of desired bins.
1691 The default, None, means the bound should be extracted from data.
1692 The rice rule is used the determine the number of bins.
1693 If a list of floats is given return them immediately.
1694 lower_bound : float or None, optional
1695 Preset lower bound of the binning range.
1696 The default, None, means the bound should be extracted from data.
1697 upper_bound : float or None, optional
1698 Preset upper bound of the binning range.
1699 The default, None, means the bound should be extracted from data.
1700 outlier_z_score : float or None, optional
1701 Threshold z-score of outlier detection.
1702 The default, None, means no outlier detection.
1703 include_exceptionals : bool, optional
1704 If the outlier detection is active this switch indicates,
1705 if values detected as exceptionally frequent shall be included
1706 nevertheless into the binning range. Default is True,
1707 which means exceptionally frequent values as included
1708 even if they are detected as outliers.
1712 np.array (1d), list(str)
1713 Pair of bin edges and labels deduced from the series.
1714 Second element is None if the series is not detected as discrete.
1716 debug = get_logger().debug
1717 debug(
'Determine binning for plot named %s', self.name)
1723 elif isinstance(bins, collections.abc.Iterable):
1727 bin_edges = array.array(
'd', bin_edges)
1729 return bin_edges, bin_labels
1740 message = f
'Cannot accept n_bins={bins} as number of bins, because it is not a number greater than 0.'
1741 raise ValueError(message)
1744 xs = np.array(xs, copy=
False)
1746 if self.is_binary(xs)
or (allow_discrete
and self.is_discrete(xs)):
1748 debug(
'Discrete binning values encountered')
1749 finite_xs = xs[np.isfinite(xs)]
1750 unique_xs = np.unique(finite_xs)
1753 if lower_bound
is None:
1754 if len(unique_xs) == 0:
1755 if upper_bound
is None:
1758 lower_bound = upper_bound - 1
1760 lower_bound = np.min(unique_xs)
1762 unique_xs = unique_xs[unique_xs >= lower_bound]
1764 if upper_bound
is None:
1765 if len(unique_xs) == 0:
1766 upper_bound = lower_bound + 1
1768 upper_bound = np.min(unique_xs)
1770 unique_xs = unique_xs[unique_xs <= upper_bound]
1773 n_bins = len(unique_xs)
or 1
1775 if len(unique_xs) > 0
and n_bins >= len(unique_xs):
1777 bin_edges = array.array(
'd', unique_xs)
1778 format_bin_label = self.format_bin_label
1779 bin_labels = [format_bin_label(value)
for value
in bin_edges]
1780 bin_edges.append(bin_edges[-1] + 1)
1781 return bin_edges, bin_labels
1790 debug(
'Lower bound %s', lower_bound)
1791 debug(
'Upper bound %s', upper_bound)
1792 debug(
'N bins %s', n_bins)
1795 bin_range = self.determine_bin_range(xs,
1798 lower_bound=lower_bound,
1799 upper_bound=upper_bound,
1800 outlier_z_score=outlier_z_score,
1801 include_exceptionals=include_exceptionals)
1803 n_bins, lower_bound, upper_bound = bin_range
1805 n_bin_edges = n_bins + 1
1806 if lower_bound != upper_bound:
1808 debug(
"Creating flat distribution binning")
1809 percentiles = np.linspace(0.0, 100.0, n_bin_edges)
1810 bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], percentiles))
1814 bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1819 bin_edges[0] = lower_bound
1820 bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1821 debug(
'Bins %s', bin_edges)
1825 bin_edges = [lower_bound, upper_bound + 1]
1828 bin_edges = array.array(
'd', bin_edges)
1829 debug(
'Bins %s for %s', bin_edges, self.name)
1830 return bin_edges,
None
1832 def determine_bin_range(self,
1838 outlier_z_score=None,
1839 include_exceptionals=True):
1840 """Calculates the number of bins, the lower bound and the upper bound from a given data series
1841 estimating the values that are not given.
1843 If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1844 The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1845 mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1846 quantile range (IQR).
1848 If additional include_exceptionals is true the method tries to find exceptional values in the series
1849 and always include them in the range if it finds any.
1850 Exceptional values means exact values that appear often in the series for whatever reason.
1851 Possible reasons include
1852 * Integral / default values
1853 * Failed evaluation conditions
1855 which should be not cropped away automatically if you are locking on the quality of your data.
1859 xs : numpy.ndarray (1d)
1860 Data point for which a binning should be found.
1861 stackbys : numpy.ndarray (1d)
1862 Categories of the data points to be distinguishable
1863 n_bins : int or None, optional
1864 Preset number of desired bins. The default, None, means the bound should be extracted from data.
1865 The rice rule is used the determine the number of bins.
1866 lower_bound : float or None, optional
1867 Preset lower bound of the binning range.
1868 The default, None, means the bound should be extracted from data.
1869 upper_bound : float or None, optional
1870 Preset upper bound of the binning range.
1871 The default, None, means the bound should be extracted from data.
1872 outlier_z_score : float or None, optional
1873 Threshold z-score of outlier detection.
1874 The default, None, means no outlier detection.
1875 include_exceptionals : bool, optional
1876 If the outlier detection is active this switch indicates,
1877 if values detected as exceptionally frequent shall be included
1878 nevertheless into the binning range. Default is True,
1879 which means exceptionally frequent values as included
1880 even if they are detected as outliers.
1884 n_bins, lower_bound, upper_bound : int, float, float
1885 A triple of found number of bins, lower bound and upper bound of the binning range.
1888 if stackbys
is not None:
1889 unique_stackbys = np.unique(stackbys)
1891 for value
in unique_stackbys:
1893 indices_for_value = np.isnan(stackbys)
1895 indices_for_value = stackbys == value
1897 stack_lower_bound, stack_upper_bound = \
1898 self.determine_range(xs[indices_for_value],
1899 lower_bound=lower_bound,
1900 upper_bound=upper_bound,
1901 outlier_z_score=outlier_z_score,
1902 include_exceptionals=include_exceptionals)
1904 stack_ranges.append([stack_lower_bound, stack_upper_bound])
1906 lower_bound = np.nanmin([lwb
for lwb, upb
in stack_ranges])
1907 upper_bound = np.nanmax([upb
for lwb, upb
in stack_ranges])
1910 lower_bound, upper_bound = self.determine_range(xs,
1911 lower_bound=lower_bound,
1912 upper_bound=upper_bound,
1913 outlier_z_score=outlier_z_score,
1914 include_exceptionals=include_exceptionals)
1919 n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1920 rice_n_bins = int(statistics.rice_n_bin(n_data))
1921 n_bins = rice_n_bins
1924 n_bins = int(n_bins)
1927 message = f
'Cannot accept n_bins={n_bins} as number of bins, because it is not a number greater than 0.'
1928 raise ValueError(message)
1930 return n_bins, lower_bound, upper_bound
1932 def determine_range(self,
1936 outlier_z_score=None,
1937 include_exceptionals=True):
1941 xs : numpy.ndarray (1d)
1942 Data point for which a binning should be found.
1943 lower_bound : float or None, optional
1944 Preset lower bound of the binning range.
1945 The default, None, means the bound should be extracted from data.
1946 upper_bound : float or None, optional
1947 Preset upper bound of the binning range.
1948 The default, None, means the bound should be extracted from data.
1949 outlier_z_score : float or None, optional
1950 Threshold z-score of outlier detection.
1951 The default, None, means no outlier detection.
1952 include_exceptionals : bool, optional
1953 If the outlier detection is active this switch indicates,
1954 if values detected as exceptionally frequent shall be included
1955 nevertheless into the binning range. Default is True,
1956 which means exceptionally frequent values as included
1957 even if they are detected as outliers.
1961 lower_bound, upper_bound : float, float
1962 A pair of found lower bound and upper bound of series.
1964 debug = get_logger().debug
1966 finite_xs_indices = np.isfinite(xs)
1967 if np.any(finite_xs_indices):
1968 finite_xs = xs[finite_xs_indices]
1972 make_symmetric =
False
1973 exclude_outliers = outlier_z_score
is not None and (lower_bound
is None or upper_bound
is None)
1976 if include_exceptionals
or exclude_outliers:
1977 exceptional_xs = self.get_exceptional_values(finite_xs)
1978 exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1981 if exclude_outliers:
1982 if not np.all(exceptional_indices):
1985 x_mean, x_std = self.get_robust_mean_and_std(finite_xs[~exceptional_indices])
1987 x_mean, x_std = np.nan, np.nan
1989 make_symmetric = abs(x_mean) < x_std / 5.0
and lower_bound
is None and upper_bound
is None
1991 if include_exceptionals
and len(exceptional_xs) != 0:
1992 lower_exceptional_x = np.min(exceptional_xs)
1993 upper_exceptional_x = np.max(exceptional_xs)
1994 make_symmetric =
False
1996 lower_exceptional_x = np.nan
1997 upper_exceptional_x = np.nan
2000 if lower_bound
is None:
2002 lower_bound = np.min(finite_xs)
2006 if outlier_z_score
is not None:
2008 lower_outlier_bound = x_mean - outlier_z_score * x_std
2012 indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2014 if np.any(indices_above_lower_outlier_bound):
2015 lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2018 lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2020 debug(
'Lower bound after outlier detection')
2021 debug(
'Lower bound %s', lower_bound)
2022 debug(
'Lower outlier bound %s', lower_outlier_bound)
2025 if upper_bound
is None:
2027 upper_bound = np.max(finite_xs)
2030 if outlier_z_score
is not None:
2032 upper_outlier_bound = x_mean + outlier_z_score * x_std
2036 indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2038 if np.any(indices_below_upper_outlier_bound):
2039 upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2042 upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2044 debug(
'Upper bound after outlier detection')
2045 debug(
'Upper bound %s', upper_bound)
2046 debug(
'Upper outlier bound %s', upper_outlier_bound)
2048 if make_symmetric
and lower_bound < 0
and upper_bound > 0:
2049 if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2050 abs_bound = max(abs(lower_bound), abs(upper_bound))
2051 lower_bound = -abs_bound
2052 upper_bound = abs_bound
2054 return lower_bound, upper_bound
2057 def set_additional_stats_tf1(cls, histogram):
2058 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2062 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2063 Something having a GetListOfFunctions method that should hold
2064 the combined fit and additional statistics function.
2066 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2067 cls.set_tf1(histogram, additional_stats_tf1)
2070 def set_fit_tf1(cls, histogram, fit_tf1):
2071 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2075 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2076 Something having a GetListOfFunctions method that should hold
2077 the combined fit and additional statistics function.
2079 additional_stats_tf1 = cls.create_additional_stats_tf1(histogram)
2080 combined_tf1 = cls.combine_fit_and_additional_stats(fit_tf1, additional_stats_tf1)
2081 cls.set_tf1(histogram, combined_tf1)
2084 def set_tf1(cls, histogram, tf1):
2085 """Set the attached TF1 of the histogram.
2089 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2090 Something having a GetListOfFunctions method that should hold
2091 the combined fit and additional statistics function.
2094 cls.delete_tf1(histogram)
2096 tf1.SetName(
"FitAndStats")
2097 histogram.GetListOfFunctions().Add(tf1)
2100 def delete_tf1(cls, histogram):
2101 """Delete the attached TF1 from the histogram
2105 histogram : ROOT.TH1 or ROOT.TGraph
2106 Something having a GetListOfFunctions method that holds the fit function
2108 tf1 = histogram.FindObject(
"FitAndStats")
2110 function_list = histogram.GetListOfFunctions()
2111 function_list.Remove(tf1)
2114 def create_additional_stats_tf1(cls, histogram):
2115 """Create a TF1 with the additional statistics from the histogram as parameters.
2119 histogram : ROOT.TH1 or ROOT.TGraph
2120 Something having a GetListOfFunctions method that holds the additional statistics.
2125 Function with the additional statistics as parameters.
2128 additional_stats = cls.get_additional_stats(histogram)
2129 if not additional_stats:
2139 formula_string =
'+'.join(
'0*[' + str(i) +
']' for i
in range(len(additional_stats)))
2142 additional_stats_tf1 = ROOT.TF1(
"Stats", formula_string, lower_bound, upper_bound)
2144 for (i, (label, value))
in enumerate(additional_stats.items()):
2148 label = label.replace(
" ",
"-")
2149 additional_stats_tf1.SetParName(i, label)
2150 additional_stats_tf1.FixParameter(i, value)
2152 return additional_stats_tf1
2155 def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1):
2156 """Combine the fit function and the function carrying the additional statistics to one function.
2162 additional_stats_tf1 : ROOT.TF1
2163 The function carrying the additional statistics as parameters
2169 if additional_stats_tf1
is None:
2175 lower_bound = ctypes.c_double()
2176 upper_bound = ctypes.c_double()
2177 fit_tf1.GetRange(lower_bound, upper_bound)
2178 title = fit_tf1.GetTitle()
2180 combined_formula = additional_stats_tf1.GetExpFormula().Data() +
'+' + fit_tf1.GetExpFormula().Data()
2181 combined_tf1 = ROOT.TF1(
"Combined", combined_formula, lower_bound.value, upper_bound.value)
2182 combined_tf1.SetTitle(title)
2185 chi2 = fit_tf1.GetChisquare()
2186 combined_tf1.SetChisquare(chi2)
2188 ndf = fit_tf1.GetNDF()
2189 combined_tf1.SetNDF(ndf)
2191 n_stats_parameters = additional_stats_tf1.GetNpar()
2193 cls.copy_tf1_parameters(additional_stats_tf1, combined_tf1)
2194 cls.copy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2199 def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0):
2200 """Copy the parameters of one TF1 to another.
2204 tf1_source : ROOT.TF1
2205 Take parameters from here
2206 tf1_target : ROOT.TF1
2208 offset : int, optional
2209 Index of the first target parameter to which to copy.
2211 n_parameters = tf1_source.GetNpar()
2214 lower_bound = ctypes.c_double()
2215 upper_bound = ctypes.c_double()
2217 for i_source
in range(n_parameters):
2218 parameter_name = tf1_source.GetParName(i_source)
2219 i_target = tf1_target.GetParNumber(parameter_name)
2223 for i_target
in range(tf1_target.GetNpar()):
2224 if parameter_name == tf1_target.GetParName(i_target):
2230 tf1_target.SetParameter(i_target,
2231 tf1_source.GetParameter(i_source))
2232 tf1_target.SetParError(i_target,
2233 tf1_source.GetParError(i_source))
2235 tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2236 tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2238 def attach_attributes(self):
2239 """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2242 self.check = self.check
2244 self.contact = self.contact
2246 self.description = self.description
2249 self.xlabel = self.xlabel
2251 self.ylabel = self.ylabel
2253 self.title = self.title
2255 def set_maximum(self, maximum):
2256 """Sets the maximum of the vertical plotable range"""
2257 for histogram
in self.histograms:
2258 if isinstance(histogram, ROOT.TH1):
2259 histogram.SetMaximum(histogram.GetMaximum(maximum))
2261 histogram.SetMaximum(maximum)
2263 def set_minimum(self, minimum):
2264 """Sets the minimum of the vertical plotable range"""
2265 for histogram
in self.histograms:
2266 if isinstance(histogram, ROOT.TH1):
2267 histogram.SetMinimum(histogram.GetMinimum(minimum))
2269 histogram.SetMinimum(minimum)
2272 def set_tstyle(cls):
2273 """Set the style such that the additional stats entries are shown by the TBrowser"""
2274 belle2_validation_style_name = "belle2_validation_style"
2275 belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2276 if not belle2_validation_tstyle:
2277 belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2280 belle2_validation_tstyle.SetOptFit(opt_fit)
2283 belle2_validation_tstyle.SetOptStat(opt_stat)
2284 ROOT.gROOT.SetStyle(belle2_validation_style_name)
2286 # belle2_validation_tstyle.SetLineStyleString(cls.very_sparse_dots_line_style_index, "4 2000")
2289 belle2_validation_tstyle.cd()
2293 """Simple test method"""
2294 ValidationPlot.set_tstyle()
2296 # Test a histogram plot with some nan and inf values
2297 normal_distributed_values = np.random.randn(1000)
2300 normal_distributed_values[i] = np.nan
2302 for i in range(10, 20):
2303 normal_distributed_values[i] = np.inf
2305 for i in range(20, 30):
2306 normal_distributed_values[i] = -np.inf
2308 validation_histogram = ValidationPlot('test_hist')
2309 validation_histogram.hist(normal_distributed_values)
2310 validation_histogram.title = 'A normal distribution'
2311 validation_histogram.xlabel = 'normal'
2312 validation_histogram.ylabel = 'frequency'
2313 validation_histogram.fit_gaus()
2315 # Test for a cumulated histogram - cumulation from left to right
2316 cumulated_histogram = ValidationPlot('test_cum_hist')
2317 cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2318 cumulated_histogram.title = 'A cumulated normal distribution'
2319 cumulated_histogram.xlabel = 'normal'
2320 cumulated_histogram.ylabel = 'cdf'
2321 cumulated_histogram.show()
2323 # Test stacked histogram
2324 # Make a random selection of 40%
2325 stackby = np.random.binomial(1.0, 0.40, 1000)
2326 stacked_validation_histogram = ValidationPlot('test_stacked_hist')
2327 stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2329 # Make a scatterplot with two species.
2330 x = np.random.randn(1000)
2331 y = 3 * np.random.randn(1000)
2332 ones = np.ones_like(x)
2335 x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2336 y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2338 stacked_validation_scatter = ValidationPlot('test_stacked_scatter')
2339 stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2341 # Make a profile plot with the two species
2342 stacked_validation_profile = ValidationPlot('test_stacked_profile')
2343 stacked_validation_profile.profile(x1, y1, stackby=stackby)
2345 # Make a two dimensional histogram with two species
2346 stacked_validation_hist2d = ValidationPlot('test_stacked_hist2d')
2347 stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2349 # Test a profile with a diagonal fit
2350 x = np.linspace(-1, 1, 1000)
2353 diagonal_plot = ValidationPlot('test_diag')
2354 diagonal_plot.profile(x, y, bins=50)
2355 diagonal_plot.fit_line()
2357 # Test if cumulation works for profile plots
2358 cumulated_profile = ValidationPlot('test_cum_profile')
2359 cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2361 tfile = ROOT.TFile('test.root', 'RECREATE')
2363 validation_histogram.write(tfile)
2365 with root_cd("expert") as tdirectory1:
2366 diagonal_plot.write(tdirectory1)
2367 cumulated_profile.write(tdirectory1)
2368 cumulated_histogram.write(tdirectory1)
2370 with root_cd("stacked") as tdirectory2:
2371 stacked_validation_histogram.write(tdirectory2)
2372 stacked_validation_scatter.write()
2373 stacked_validation_profile.write()
2374 stacked_validation_hist2d.write()
2378 tfile = ROOT.TFile('test.root')
2379 tBrowser = ROOT.TBrowser()
2380 tBrowser.BrowseObject(tfile)
2385if __name__ == '__main__':