31 """Get for the logging.Logger instance of this module
36 Logger instance of this module
38 return logging.getLogger(__name__)
42 units_by_quantity_name = {
67 def get_unit(quantity_name):
68 """Infers the unit of a quantity from its name.
70 Assumes the standard Belle II unit system.
72 Currently looks up the quantity string from units_by_quantity_name.
77 Name of a quantity (E.g. pt, x, ...)
84 unit = units_by_quantity_name.get(quantity_name,
None)
88 def compose_axis_label(quantity_name, unit=None):
89 """Formats a quantity name and a unit to a label for a plot axes.
91 If the unit is not given to is tried to infer it
92 from the quantity name by the get_unit function.
97 Name of the quantity to be displayed at the axes
99 The unit of the quantity. Defaults to get_unit(quantity_name)
107 unit = get_unit(quantity_name)
110 axis_label = quantity_name
112 axis_label =
'%s (%s)' % (quantity_name, unit)
117 def get1DBinningFromReference(name, refFileName):
118 """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
120 @param name : name of the TH1 object to be looked for in the file
121 @param refFileName : name of the reference file where the object is searched for
123 @return int nbin, float xmin, float xmax of the TH1
130 if refFileName
is None or refFileName ==
"":
131 return nbins, x_min, x_max
134 oldDirectory = ROOT.gROOT.CurrentDirectory().load()
136 tfile = ROOT.TFile(refFileName)
138 objptr = tfile.Get(name)
139 if objptr
and objptr.InheritsFrom(
"TH1"):
140 nbins = objptr.GetNbinsX()
141 x_min = objptr.GetXaxis().GetXmin()
142 x_max = objptr.GetXaxis().GetXmax()
144 basf2.B2WARNING(
'Requested object with name: ' + name +
' not found in file: ' + refFileName +
" (or not a TH1)")
146 basf2.B2WARNING(
'Requested file: ' + refFileName +
' could not be opened')
153 return nbins, x_min, x_max
157 StatsEntry = ROOT.TParameter(float)
162 """Class for generating a validation plot for the Belle II validation page.
164 Typically it generates plots from values stored in numpy arrays and feeds them into
165 plot ROOT classes for storing them.
167 It implements an automatic binning procedure based on the rice rule and
168 robust z score outlier detection.
170 It also keeps track of additional statistics typically neglected by ROOT such as a count
171 for the non finit values such as NaN, +Inf, -Inf.
173 The special attributes for the Belle II validation page like
178 are exposed as properties of this class.
182 very_sparse_dots_line_style_index = 28
186 """Constructor of the ValidationPlot
191 A unique name to be used as the name of the ROOT object to be generated
193 referenceFileName : str
194 name of a reference file. If set the code will try to get the histogram or profile
195 from that file and determine the number of bins and upper and lower bound
196 (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
200 self.
namename = root_save_name(name)
248 outlier_z_score=None,
249 include_exceptionals=True,
250 allow_discrete=False,
251 cumulation_direction=None,
253 """Fill the plot with a one dimensional histogram."""
258 if n
is not None and xmin
is not None and xmax
is not None:
263 th1_factory = ROOT.TH1D
271 lower_bound=lower_bound,
272 upper_bound=upper_bound,
273 outlier_z_score=outlier_z_score,
274 include_exceptionals=include_exceptionals,
275 allow_discrete=allow_discrete,
276 cumulation_direction=cumulation_direction)
294 outlier_z_score=None,
295 include_exceptionals=True,
296 allow_discrete=False,
297 cumulation_direction=None,
301 """Fill the plot with a one dimensional profile of one variable over another."""
309 if n
is not None and xmin
is not None and xmax
is not None:
314 th1_factory = ROOT.TProfile
316 if gaus_z_score
is None:
323 lower_bound=lower_bound,
324 upper_bound=upper_bound,
325 outlier_z_score=outlier_z_score,
326 include_exceptionals=include_exceptionals,
327 allow_discrete=allow_discrete,
328 cumulation_direction=cumulation_direction)
332 self.
hist2dhist2d(xs, ys=ys, weights=weights, stackby=stackby,
334 lower_bound=(lower_bound,
None),
335 upper_bound=(upper_bound,
None),
336 outlier_z_score=(outlier_z_score, outlier_z_score),
337 include_exceptionals=(include_exceptionals,
True),
338 allow_discrete=(allow_discrete,
False),
345 name=histogram.GetName()[1:],
346 z_score=gaus_z_score)
347 profiles.append(profile)
349 self.
plotplot = self.
create_stackcreate_stack(profiles, name=self.
plotplot.GetName()[1:], reverse_stack=
False)
352 self.
y_logy_log =
True
354 if y_binary
or self.
is_binaryis_binary(ys):
362 histogram.SetMinimum(min_y)
363 histogram.SetMaximum(1.05)
365 self.
plotplot.SetMinimum(min_y)
366 self.
plotplot.SetMaximum(1.05)
374 lower_bound=(
None,
None),
375 upper_bound=(
None,
None),
376 outlier_z_score=(
None,
None),
377 include_exceptionals=(
True,
True),
380 """Fill the plot with a (unbinned) two dimensional scatter plot"""
384 x_lower_bound, y_lower_bound = self.
unpack_2d_paramunpack_2d_param(lower_bound)
385 x_upper_bound, y_upper_bound = self.
unpack_2d_paramunpack_2d_param(upper_bound)
386 x_outlier_z_score, y_outlier_z_score = self.
unpack_2d_paramunpack_2d_param(outlier_z_score)
387 x_include_exceptionals, y_include_exceptionals = self.
unpack_2d_paramunpack_2d_param(include_exceptionals)
391 lower_bound=x_lower_bound,
392 upper_bound=x_upper_bound,
393 outlier_z_score=x_outlier_z_score,
394 include_exceptionals=x_include_exceptionals
399 lower_bound=y_lower_bound,
400 upper_bound=y_upper_bound,
401 outlier_z_score=y_outlier_z_score,
402 include_exceptionals=y_include_exceptionals
405 graph = ROOT.TGraph()
407 graph.SetName(self.
namename)
408 graph.SetMarkerStyle(6)
409 graph.GetHistogram().SetOption(
"AP")
414 graph.SetLineColorAlpha(color_index, 0)
418 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
419 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
433 lower_bound=(
None,
None),
434 upper_bound=(
None,
None),
435 outlier_z_score=(
None,
None),
436 include_exceptionals=(
True,
True),
439 """Fill the plot with a (unbinned) two dimensional scatter plot
440 xs_and_err and ys_and_err are tuples containing the values and the errors on these values
449 x_lower_bound, y_lower_bound = self.
unpack_2d_paramunpack_2d_param(lower_bound)
450 x_upper_bound, y_upper_bound = self.
unpack_2d_paramunpack_2d_param(upper_bound)
451 x_outlier_z_score, y_outlier_z_score = self.
unpack_2d_paramunpack_2d_param(outlier_z_score)
452 x_include_exceptionals, y_include_exceptionals = self.
unpack_2d_paramunpack_2d_param(include_exceptionals)
456 lower_bound=x_lower_bound,
457 upper_bound=x_upper_bound,
458 outlier_z_score=x_outlier_z_score,
459 include_exceptionals=x_include_exceptionals
464 lower_bound=y_lower_bound,
465 upper_bound=y_upper_bound,
466 outlier_z_score=y_outlier_z_score,
467 include_exceptionals=y_include_exceptionals
470 graph = ROOT.TGraphErrors()
472 graph.SetName(self.
namename)
473 graph.GetHistogram().SetOption(
"A")
475 graph.SetMarkerColor(4)
476 graph.SetMarkerStyle(21)
479 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
480 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
496 lower_bound=(
None,
None),
497 upper_bound=(
None,
None),
498 outlier_z_score=(
None,
None),
499 include_exceptionals=(
True,
True),
500 allow_discrete=(
False,
False),
503 """Fill the plot with a two dimensional histogram"""
507 if quantiles
is not None:
508 name =
"_" + self.
namename
511 x_lower_bound, y_lower_bound = self.
unpack_2d_paramunpack_2d_param(lower_bound)
512 x_upper_bound, y_upper_bound = self.
unpack_2d_paramunpack_2d_param(upper_bound)
513 x_outlier_z_score, y_outlier_z_score = self.
unpack_2d_paramunpack_2d_param(outlier_z_score)
514 x_include_exceptionals, y_include_exceptionals = self.
unpack_2d_paramunpack_2d_param(include_exceptionals)
515 x_allow_discrete, y_allow_discrete = self.
unpack_2d_paramunpack_2d_param(allow_discrete)
517 if quantiles
is not None:
518 y_include_exceptionals =
True
519 y_allow_discrete =
False
524 lower_bound=x_lower_bound,
525 upper_bound=x_upper_bound,
526 outlier_z_score=x_outlier_z_score,
527 include_exceptionals=x_include_exceptionals,
528 allow_discrete=x_allow_discrete)
533 lower_bound=y_lower_bound,
534 upper_bound=y_upper_bound,
535 outlier_z_score=y_outlier_z_score,
536 include_exceptionals=y_include_exceptionals,
537 allow_discrete=y_allow_discrete)
539 n_x_bins = len(x_bin_edges) - 1
540 n_y_bins = len(y_bin_edges) - 1
546 histogram = ROOT.TH2D(name,
554 get_logger().info(
"Scatter plot %s is discrete in x.", name)
555 x_taxis = histogram.GetXaxis()
556 for i_x_bin, x_bin_label
in enumerate(x_bin_labels):
557 x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
561 x_bin_width = x_bin_edges[1] - x_bin_edges[0]
565 get_logger().info(
"Scatter plot %s is discrete in y.", name)
566 y_taxis = histogram.GetYaxis()
567 for i_y_bin, y_bin_label
in enumerate(y_bin_labels):
568 y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
572 y_bin_width = y_bin_edges[1] - y_bin_edges[0]
575 self.
createcreate(histogram, xs, ys=ys, weights=weights, stackby=stackby)
577 if quantiles
is not None:
581 for quantile
in quantiles:
582 profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] +
'_' + str(quantile))
585 x_taxis = histogram.GetXaxis()
586 new_x_taxis = profile.GetXaxis()
587 for i_bin
in range(x_taxis.GetNbins() + 2):
588 label = x_taxis.GetBinLabel(i_bin)
590 new_x_taxis.SetBinLabel(i_bin, label)
593 epsilon = sys.float_info.epsilon
594 for i_bin
in range(0, profile.GetNbinsX() + 2):
595 profile.SetBinError(i_bin, epsilon)
597 profiles.append(profile)
600 self.
plotplot = self.
create_stackcreate_stack(profiles, name=self.
plotplot.GetName()[1:], reverse_stack=
False, force_graph=
True)
605 x_taxis = histogram.GetXaxis()
606 x_bin_edges = array.array(
"d", list(range(len(x_bin_labels) + 1)))
607 x_taxis.Set(n_x_bins, x_bin_edges)
612 x_taxis = histogram.GetXaxis()
613 y_bin_edges = array.array(
"d", list(range(len(y_bin_labels) + 1)))
614 y_taxis.Set(n_y_bins, y_bin_edges)
619 """Fit a gaus belle curve to the central portion of a one dimensional histogram
621 The fit is applied to the central mean +- z_score * std interval of the histogram,
622 such that it is less influence by non gaussian tails further away than the given z score.
624 @param float z_score number of sigmas to include from the mean value of the histogram.
631 raise RuntimeError(
'Validation plot must be filled before it can be fitted.')
633 if not isinstance(plot, ROOT.TH1D):
634 raise RuntimeError(
'Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
638 fit_tf1 = ROOT.TF1(
"Fit", formula)
639 fit_tf1.SetTitle(title)
640 fit_tf1.SetParName(0,
"n")
641 fit_tf1.SetParName(1,
"mean")
642 fit_tf1.SetParName(2,
"std")
644 n = histogram.GetSumOfWeights()
645 mean = histogram.GetMean()
646 std = histogram.GetStdDev()
648 fit_tf1.SetParameter(0, n)
649 fit_tf1.SetParameter(1, mean)
650 fit_tf1.SetParameter(2, std)
653 return self.
fitfit(fit_tf1,
658 """Fit a general line to a one dimensional histogram"""
661 fit_tf1 = ROOT.TF1(
"Fit", formula)
662 fit_tf1.SetTitle(title)
663 fit_tf1.SetParName(0,
"slope")
664 fit_tf1.SetParName(1,
"intercept")
665 self.
fitfit(fit_tf1,
'M')
668 """Fit a constant function to a one dimensional histogram"""
671 fit_tf1 = ROOT.TF1(
"Fit", formula)
672 fit_tf1.SetTitle(title)
673 fit_tf1.SetParName(0,
"intercept")
674 self.
fitfit(fit_tf1,
'M')
677 """Fit a diagonal line through the origin to a one dimensional histogram"""
680 fit_tf1 = ROOT.TF1(
"Fit", formula)
681 fit_tf1.SetTitle(title)
682 fit_tf1.SetParName(0,
"slope")
683 self.
fitfit(fit_tf1,
'M')
685 def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
686 """Fit a user defined function to a one dimensional histogram
691 Formula string or TH1 to be fitted. See TF1 constructurs for that is a valid formula
693 Options string to be used in the fit. See TH1::Fit()
695 Lower bound of the range to be fitted
697 Upper bound of the range to be fitted
701 raise RuntimeError(
'Validation plot must be filled before it can be fitted.')
703 if not isinstance(plot, ROOT.TH1D):
704 raise RuntimeError(
'Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
708 xaxis = histogram.GetXaxis()
709 n_bins = xaxis.GetNbins()
710 hist_lower_bound = xaxis.GetBinLowEdge(1)
711 hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
713 if z_score
is not None:
714 mean = histogram.GetMean()
715 std = histogram.GetStdDev()
717 if lower_bound
is None:
718 lower_bound = mean - z_score * std
720 if upper_bound
is None:
721 upper_bound = mean + z_score * std
724 if isinstance(formula, ROOT.TF1):
726 fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
728 fit_tf1 = ROOT.TF1(
"Fit",
732 get_logger().info(
'Fitting with %s', fit_tf1.GetExpFormula())
735 if lower_bound
is None or lower_bound < hist_lower_bound:
736 lower_bound = hist_lower_bound
737 if upper_bound
is None or upper_bound > hist_upper_bound:
738 upper_bound = hist_upper_bound
742 if 'N' not in options:
745 fit_res = histogram.Fit(fit_tf1, options +
"S",
"", lower_bound, upper_bound)
755 raise ValueError(
"Can not show a validation plot that has not been filled.")
758 """Write the plot to file
762 tdirectory : ROOT.TDirectory, optional
763 ROOT directory to which the plot should be written.
764 If omitted write to the current directory
766 if not self.
plotplot:
767 raise ValueError(
"Can not write a validation plot that has not been filled.")
769 with root_cd(tdirectory):
770 ValidationPlot.set_tstyle()
772 self.
plotplot.Write()
775 meta_options = [
"nostats"]
779 meta_options.append(
"expert")
781 meta_options.append(
"shifter")
785 meta_options.append(
"pvalue-error={}".format(self.
pvalue_errorpvalue_error))
787 meta_options.append(
"pvalue-warn={}".format(self.
pvalue_warnpvalue_warn))
791 meta_options.append(
"logy")
793 meta_options_str =
",".join(meta_options)
796 histogram.GetListOfFunctions().Add(ROOT.TNamed(
'MetaOptions', meta_options_str))
801 """Getter method if an plot plot is marked as expert plot"""
806 """Getter for the plot title"""
811 """Setter for the plot title"""
814 self.
plotplot.SetTitle(title)
816 histogram.SetTitle(title)
820 """Getter for the axis label at the x axis"""
825 """Setter for the axis label at the x axis"""
828 histogram.GetXaxis().SetTitle(xlabel)
832 """Getter for the axis label at the y axis"""
837 """Setter for the axis label at the y axis"""
840 histogram.GetYaxis().SetTitle(ylabel)
844 """Getter for the contact email address to be displayed on the validation page"""
849 """Setter for the contact email address to be displayed on the validation page"""
852 found_obj = histogram.FindObject(
'Contact')
854 tnamed = ROOT.TNamed(
"Contact", contact)
855 histogram.GetListOfFunctions().Add(tnamed)
856 found_obj = histogram.FindObject(
'Contact')
857 found_obj.SetTitle(contact)
861 """Getter for the description to be displayed on the validation page"""
866 """Setter for the description to be displayed on the validation page"""
869 found_obj = histogram.FindObject(
'Description')
871 tnamed = ROOT.TNamed(
"Description", description)
872 histogram.GetListOfFunctions().Add(tnamed)
873 found_obj = histogram.FindObject(
'Description')
874 found_obj.SetTitle(description)
878 """Getter for the check to be displayed on the validation page"""
883 """Setter for the check to be displayed on the validation page"""
886 found_obj = histogram.FindObject(
'Check')
888 tnamed = ROOT.TNamed(
"Check", check)
889 histogram.GetListOfFunctions().Add(tnamed)
890 found_obj = histogram.FindObject(
'Check')
891 found_obj.SetTitle(check)
898 """Unpacks a function parameter for the two dimensional plots.
900 If it is a pair the first parameter shall apply to the x coordinate
901 the second to the y coordinate. In this case the pair is returned as two values
903 If something else is given the it is assumed that this parameter should equally apply
904 to both coordinates. In this case the same values is return twice as a pair.
908 param : pair or single value
909 Function parameter for a two dimensional plot
914 A pair of values being the parameter for the x coordinate and
915 the y coordinate respectively
919 x_param, y_param = param
923 return x_param, y_param
927 """Determine if the data consists of boolean values"""
928 return statistics.is_binary_series(xs)
932 """Determine if the data consists of discrete values"""
933 return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
937 """Find exceptionally frequent values
947 A list of the found exceptional values.
949 return statistics.rice_exceptional_values(xs)
953 """Does an estimation of mean and standard deviation robust against outliers.
963 Pair of mean and standard deviation
965 x_mean = statistics.truncated_mean(xs)
966 x_std = statistics.trimmed_std(xs)
971 """Formats a value to be placed at a tick on an axis."""
972 if np.isfinite(value)
and value == np.round(value):
973 return str(int(value))
975 formated_value =
"{:.5g}".format(value)
978 if len(formated_value) > 8:
979 formated_value =
"{:.3e}".format(value)
980 return formated_value
991 outlier_z_score=None,
992 include_exceptionals=True,
993 allow_discrete=False,
994 cumulation_direction=None):
995 """Combined factory method for creating a one dimensional histogram or a profile plot."""
999 xs = np.array(xs, copy=
False)
1002 ys = np.array(ys, copy=
False)
1004 if weights
is not None:
1005 weights = np.array(weights, copy=
False)
1010 lower_bound=lower_bound,
1011 upper_bound=upper_bound,
1012 outlier_z_score=outlier_z_score,
1013 include_exceptionals=include_exceptionals,
1014 allow_discrete=allow_discrete)
1016 n_bins = len(bin_edges) - 1
1019 histogram = th1_factory(name,
'', n_bins, bin_edges)
1022 get_logger().info(
"One dimensional plot %s is discrete in x.", name)
1023 x_taxis = histogram.GetXaxis()
1024 for i_bin, bin_label
in enumerate(bin_labels):
1025 x_taxis.SetBinLabel(i_bin + 1, bin_label)
1029 bin_width = bin_edges[1] - bin_edges[0]
1032 self.
createcreate(histogram,
1037 cumulation_direction=cumulation_direction,
1044 x_taxis = histogram.GetXaxis()
1045 bin_edges = array.array(
"d", list(range(len(bin_labels) + 1)))
1046 x_taxis.Set(n_bins, bin_edges)
1054 cumulation_direction=None,
1055 reverse_stack=None):
1056 """Create histograms from a template, possibly stacked"""
1061 histogram = histogram_template
1062 self.
fill_intofill_into(histogram, xs, ys, weights=weights)
1063 if cumulation_direction
is not None:
1064 histogram = self.
cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1066 histograms.append(histogram)
1070 stackby = np.array(stackby, copy=
False)
1071 name = histogram_template.GetName()
1078 groupby_label=
"stack")
1080 if cumulation_direction
is not None:
1081 histograms = [self.
cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1082 for histogram
in histograms]
1084 plot = self.
create_stackcreate_stack(histograms, name=name +
"_stacked", reverse_stack=reverse_stack)
1087 self.
plotplot = plot
1092 """Create a stack of histograms"""
1093 if len(histograms) == 1:
1094 plot = histograms[0]
1096 if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph))
or force_graph:
1097 plot = ROOT.TMultiGraph()
1099 plot = ROOT.THStack()
1106 for histogram
in reversed(histograms):
1107 if isinstance(histogram, ROOT.TProfile)
or (isinstance(histogram, ROOT.TH1)
and force_graph):
1109 plot.Add(histogram,
"APZ")
1113 for histogram
in histograms:
1114 if isinstance(histogram, ROOT.TProfile)
or (isinstance(histogram, ROOT.TH1)
and force_graph):
1116 plot.Add(histogram,
"APZ")
1124 """Extract errors from a TProfile histogram and create a TGraph from these"""
1125 if isinstance(tprofile, ROOT.TGraph):
1128 x_taxis = tprofile.GetXaxis()
1129 n_bins = x_taxis.GetNbins()
1132 bin_ids_without_underflow = list(range(1, n_bins + 1))
1134 bin_centers = np.array([x_taxis.GetBinCenter(i_bin)
for i_bin
in bin_ids_without_underflow])
1136 bin_centers = np.abs(bin_centers)
1137 bin_widths = np.array([x_taxis.GetBinWidth(i_bin)
for i_bin
in bin_ids_without_underflow])
1138 bin_x_errors = bin_widths / 2.0
1141 bin_contents = np.array([tprofile.GetBinContent(i_bin)
for i_bin
in bin_ids_without_underflow])
1142 bin_y_errors = np.array([tprofile.GetBinError(i_bin)
for i_bin
in bin_ids_without_underflow])
1144 tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1146 tgrapherrors.GetHistogram().SetOption(
"APZ")
1148 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1149 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1152 for tobject
in tprofile.GetListOfFunctions():
1153 tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1156 cls.
add_stats_entryadd_stats_entry(tgrapherrors,
'count', tprofile.GetEntries())
1158 stats_values = np.array([np.nan] * 6)
1159 tprofile.GetStats(stats_values)
1161 sum_w = stats_values[0]
1163 sum_wx = stats_values[2]
1164 sum_wx2 = stats_values[3]
1165 sum_wy = stats_values[4]
1166 sum_wy2 = stats_values[5]
1174 np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1182 np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1186 tgrapherrors.GetCovariance())
1190 tgrapherrors.GetCorrelationFactor())
1200 groupby_label="group"):
1201 """Fill data into similar histograms in groups indicated by a groupby array"""
1204 unique_groupbys = np.unique(groupbys)
1205 name = histogram_template.GetName()
1207 for i_value, value
in enumerate(unique_groupbys):
1209 indices_for_value = np.isnan(groupbys)
1211 indices_for_value = groupbys == value
1214 histogram_for_value = histogram_template.Clone(name +
'_' + str(value))
1215 i_root_color = i_value + 1
1217 self.
set_colorset_color(histogram_for_value, i_root_color)
1220 self.
add_stats_entryadd_stats_entry(histogram_for_value, groupby_label, value)
1222 self.
fill_intofill_into(histogram_for_value,
1226 filter=indices_for_value)
1228 histograms.append(histogram_for_value)
1233 """Set the color of the ROOT object.
1235 By default the line color of a TGraph should be invisible, so do not change it
1236 For other objects set the marker and the line color
1240 tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1241 Object of which the color should be set.
1243 Color index of the ROOT color table
1245 if isinstance(tobject, ROOT.TGraph):
1246 tobject.SetMarkerColor(root_i_color)
1248 tobject.SetLineColor(root_i_color)
1249 tobject.SetMarkerColor(root_i_color)
1251 def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1252 """Fill the data into the plot object"""
1253 if isinstance(plot, ROOT.TGraph):
1255 raise ValueError(
"ys are required for filling a graph")
1257 elif isinstance(plot, ROOT.TGraphErrors):
1259 raise ValueError(
"ys are required for filling a graph error")
1263 self.
fill_into_th1fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1266 """fill point values and error of the x and y axis into the graph"""
1268 assert(len(xs[0]) == len(ys[0]))
1270 graph.Set(len(xs[0]))
1272 for i
in range(len(xs[0])):
1273 graph.SetPoint(i, xs[0][i], ys[0][i])
1274 graph.SetPointError(i, xs[1][i], ys[1][i])
1277 """Fill the data into a TGraph"""
1281 filter =
slice(
None)
1291 if x_n_data > max_n_data
or y_n_data > max_n_data:
1292 get_logger().warning(
"Number of points in scatter graph %s exceed limit %s" %
1293 (self.
namename, max_n_data))
1295 get_logger().warning(
"Cropping %s" % max_n_data)
1297 xs = xs[0:max_n_data]
1298 ys = ys[0:max_n_data]
1300 x_axis = graph.GetXaxis()
1301 y_axis = graph.GetYaxis()
1303 x_lower_bound = x_axis.GetXmin()
1304 x_upper_bound = x_axis.GetXmax()
1306 y_lower_bound = y_axis.GetXmin()
1307 y_upper_bound = y_axis.GetXmax()
1309 x_underflow_indices = xs < x_lower_bound
1310 x_overflow_indices = xs > x_upper_bound
1312 y_underflow_indices = ys < y_lower_bound
1313 y_overflow_indices = ys > y_upper_bound
1315 plot_indices = ~(np.isnan(xs) |
1316 x_underflow_indices |
1317 x_overflow_indices |
1319 y_underflow_indices |
1322 n_plot_data = np.sum(plot_indices)
1323 plot_xs = xs[plot_indices]
1324 plot_ys = ys[plot_indices]
1326 graph.Set(int(n_plot_data))
1327 for i, (x, y)
in enumerate(zip(plot_xs, plot_ys)):
1328 graph.SetPoint(i, x, y)
1330 self.
add_stats_entryadd_stats_entry(graph,
'count', np.sum(np.isfinite(xs)))
1335 x_n_underflow = np.sum(x_underflow_indices)
1337 self.
add_stats_entryadd_stats_entry(graph,
'x underf.', x_n_underflow)
1339 x_n_overflow = np.sum(x_overflow_indices)
1343 y_n_underflow = np.sum(y_underflow_indices)
1345 self.
add_stats_entryadd_stats_entry(graph,
'y underf.', y_n_underflow)
1347 y_n_overflow = np.sum(y_overflow_indices)
1357 self.
add_stats_entryadd_stats_entry(graph,
'cov', graph.GetCovariance())
1358 self.
add_stats_entryadd_stats_entry(graph,
'corr', graph.GetCorrelationFactor())
1361 """Fill the histogram blocking non finite values
1365 histogram : ROOT.TH1
1366 The histogram to be filled
1367 xs : numpy.ndarray (1d)
1368 Data for the first axes
1369 ys : numpy.ndarray (1d), optional
1370 Data for the second axes
1371 weights : numpy.ndarray (1d), optional
1372 Weight of the individual points. Defaults to one for each
1373 filter : numpy.ndarray, optional
1374 Boolean index array indicating which entries shall be taken.
1378 filter =
slice(
None)
1383 finite_filter = np.isfinite(xs)
1389 finite_filter &= np.isfinite(ys)
1392 xs = xs[finite_filter]
1393 weights = np.ones_like(xs)
1395 weights = weights[filter]
1397 finite_filter &= np.isfinite(weights)
1398 xs = xs[finite_filter]
1399 weights[finite_filter]
1402 ys = ys[finite_filter]
1407 except AttributeError:
1408 Fill = histogram.Fill
1410 fill = np.frompyfunc(Fill, 2, 1)
1411 fill(xs.astype(np.float64, copy=
False),
1412 weights.astype(np.float64, copy=
False))
1414 fill = np.frompyfunc(Fill, 3, 1)
1415 fill(xs.astype(np.float64, copy=
False),
1416 ys.astype(np.float64, copy=
False),
1417 weights.astype(np.float64, copy=
False))
1421 xs = xs.astype(np.float64, copy=
False)
1422 weights = weights.astype(np.float64, copy=
False)
1425 histogram.FillN(n, xs, weights)
1427 basf2.B2WARNING(
"No values to be filled into histogram: " + self.
namename)
1431 xs = xs.astype(np.float64, copy=
False)
1432 ys = ys.astype(np.float64, copy=
False)
1433 weights = weights.astype(np.float64, copy=
False)
1436 histogram.FillN(n, xs, ys, weights)
1438 basf2.B2WARNING(
"No values to be filled into histogram: " + self.
namename)
1444 """ Extracts the counts of non finite floats from a series
1445 and adds them as additional statistics to the histogram.
1449 histogram : derived from ROOT.TH1 or ROOT.TGraph
1450 Something having a GetListOfFunctions method that
1452 A label for the data series to be prefixed to the entries.
1453 xs : numpy.ndarray (1d)
1454 Data from which the non finit floats should be counted.
1456 n_nans = np.isnan(xs).sum()
1460 n_positive_inf = np.sum(xs == np.inf)
1461 if n_positive_inf > 0:
1462 cls.
add_stats_entryadd_stats_entry(histogram, name +
' pos inf', n_positive_inf)
1464 n_negative_inf = np.sum(xs == -np.inf)
1465 if n_negative_inf > 0:
1466 cls.
add_stats_entryadd_stats_entry(histogram, name +
' neg inf', n_negative_inf)
1470 """Add a new additional statistics to the histogram.
1474 histogram : derived from ROOT.TH1 or ROOT.TGraph
1475 Something having a GetListOfFunctions method that holds the additional statistics
1477 Label of the statistic
1479 Value of the statistic
1481 stats_entry = StatsEntry(str(label), float(value))
1482 histogram.GetListOfFunctions().Add(stats_entry)
1487 """Get the additional statistics from the histogram and return them a dict.
1491 histogram : derived from ROOT.TH1 or ROOT.TGraph
1492 Something having a GetListOfFunctions method that holds the additional statistics
1496 collection.OrderedDict
1497 A map of labels to values for the additional statistics
1499 additional_stats = collections.OrderedDict()
1500 for tobject
in histogram.GetListOfFunctions():
1501 if isinstance(tobject, StatsEntry):
1502 stats_entry = tobject
1503 label = stats_entry.GetName()
1504 value = stats_entry.GetVal()
1505 additional_stats[label] = value
1506 return additional_stats
1510 """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1513 y_taxis = th2.GetYaxis()
1514 th2_lower_bound = y_taxis.GetXmin()
1515 th2_upper_bound = y_taxis.GetXmax()
1516 th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1517 n_y_bins = y_taxis.GetNbins()
1519 y_mean = th2.GetMean(2)
1520 y_std = th2.GetStdDev(2)
1521 fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1522 fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1523 fit_height = fit_upper_bound - fit_lower_bound
1525 required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1527 fit_lower_bound = th2_lower_bound
1528 fit_upper_bound = th2_upper_bound
1529 fit_height = fit_upper_bound - fit_lower_bound
1530 required_n_bins_inslice_filled = n_y_bins / 1.61
1533 required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1535 fit_tf1 = ROOT.TF1(
"Fit",
"gaus", fit_lower_bound, fit_upper_bound)
1536 fit_tf1.SetParName(0,
"n")
1537 fit_tf1.SetParName(1,
"mean")
1538 fit_tf1.SetParName(2,
"std")
1542 param_fit_th1s = ROOT.TObjArray()
1543 th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1544 int(required_n_bins_inslice_filled),
1545 fit_options, param_fit_th1s)
1547 th1_means = param_fit_th1s.At(1)
1548 th1_means.SetName(name)
1549 th1_means.SetTitle(th2.GetTitle())
1554 x_taxis = th2.GetXaxis()
1555 new_x_taxis = th1_means.GetXaxis()
1556 for i_bin
in range(x_taxis.GetNbins() + 2):
1557 label = x_taxis.GetBinLabel(i_bin)
1559 new_x_taxis.SetBinLabel(i_bin, label)
1562 data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1563 data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1564 data_height = data_upper_bound - data_lower_bound
1566 plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1567 plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1569 th1_means.SetMinimum(plot_lower_bound)
1570 th1_means.SetMaximum(plot_upper_bound)
1575 def cumulate(cls, histogram, cumulation_direction=None):
1576 """Cumulates the histogram inplace.
1580 histogram : ROOT.TH1 or ROOT.TProfile
1581 Filled histogram to be cumulated
1582 cumulation_direction : int, optional
1583 Direction is indicated by the sign.
1584 Positive means from left to right, negative means from right to left.
1585 If now cumulation direction is given return the histogram as is.
1590 Cumulated histogram potentially altered inplace.
1592 if not cumulation_direction:
1595 cumulate_backward = cumulation_direction < 0
1596 cumulate_forward =
not cumulate_backward
1598 if isinstance(histogram, ROOT.TH2):
1599 raise ValueError(
"Cannot cumulate a two dimensional histogram.")
1601 if isinstance(histogram, ROOT.TH3):
1602 raise ValueError(
"Cannot cumulate a three dimensional histogram.")
1604 if not isinstance(histogram, ROOT.TH1):
1605 raise ValueError(
"Can only cumulate a one dimensional histogram.")
1607 if isinstance(histogram, ROOT.TProfile):
1608 tprofile = histogram
1611 tgraph.SetName(tprofile.GetName())
1613 n_bins = histogram.GetNbinsX()
1615 cumulated_content = 0.0
1616 cumulated_entries = 0
1620 i_bins = list(range(0, n_bins + 2))
1621 if not cumulate_forward:
1622 i_bins = reversed(i_bins)
1624 for i_bin
in i_bins:
1626 bin_content = tprofile.GetBinContent(i_bin)
1627 bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1628 bin_std = tprofile.GetBinError(i_bin)
1630 if bin_entries != 0:
1631 cumulated_content = (
1632 1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1633 (cumulated_entries + bin_entries)
1637 math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1638 (cumulated_entries + bin_entries)
1641 cumulated_entries = cumulated_entries + bin_entries
1646 if i_point >= 0
and i_point < n_points:
1647 x = tgraph.GetX()[i_point]
1651 tgraph.SetPoint(i_point, x, cumulated_content)
1653 x_error = tgraph.GetErrorX(i_point)
1654 tgraph.SetPointError(i_point, x_error, cumulated_std)
1659 n_bins = histogram.GetNbinsX()
1660 cumulated_content = 0.0
1662 i_bins = list(range(0, n_bins + 2))
1663 if not cumulate_forward:
1664 i_bins = reversed(i_bins)
1666 for i_bin
in i_bins:
1667 bin_content = histogram.GetBinContent(i_bin)
1668 cumulated_content += bin_content
1669 histogram.SetBinContent(i_bin, cumulated_content)
1679 outlier_z_score=None,
1680 include_exceptionals=True,
1681 allow_discrete=False):
1682 """Deducing bin edges from a data series.
1686 xs : numpy.ndarray (1d)
1687 Data point for which a binning should be found.
1688 stackbys : numpy.ndarray (1d)
1689 Categories of the data points to be distinguishable
1690 bins : list(float) or int or None, optional
1691 Preset bin edges or preset number of desired bins.
1692 The default, None, means the bound should be extracted from data.
1693 The rice rule is used the determine the number of bins.
1694 If a list of floats is given return them immediatly.
1695 lower_bound : float or None, optional
1696 Preset lower bound of the binning range.
1697 The default, None, means the bound should be extracted from data.
1698 upper_bound : float or None, optional
1699 Preset upper bound of the binning range.
1700 The default, None, means the bound should be extracted from data.
1701 outlier_z_score : float or None, optional
1702 Threshold z-score of outlier detection.
1703 The default, None, means no outlier detection.
1704 include_exceptionals : bool, optional
1705 If the outlier detection is active this switch indicates,
1706 if values detected as exceptionally frequent shall be included
1707 nevertheless into the binning range. Default is True,
1708 which means exceptionally frequent values as included
1709 even if they are detected as outliers.
1713 np.array (1d), list(str)
1714 Pair of bin edges and labels deduced from the series.
1715 Second element is None if the series is not detected as discrete.
1717 debug = get_logger().debug
1718 debug(
'Determine binning for plot named %s', self.
namename)
1724 elif isinstance(bins, collections.Iterable):
1728 bin_edges = array.array(
'd', bin_edges)
1730 return bin_edges, bin_labels
1741 message =
'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % bins
1742 raise ValueError(message)
1745 xs = np.array(xs, copy=
False)
1749 debug(
'Discrete binning values encountered')
1750 finite_xs = xs[np.isfinite(xs)]
1751 unique_xs = np.unique(finite_xs)
1754 if lower_bound
is None:
1755 if len(unique_xs) == 0:
1756 if upper_bound
is None:
1759 lower_bound = upper_bound - 1
1761 lower_bound = np.min(unique_xs)
1763 unique_xs = unique_xs[unique_xs >= lower_bound]
1765 if upper_bound
is None:
1766 if len(unique_xs) == 0:
1767 upper_bound = lower_bound + 1
1769 upper_bound = np.min(unique_xs)
1771 unique_xs = unique_xs[unique_xs <= upper_bound]
1774 n_bins = len(unique_xs)
or 1
1776 if len(unique_xs) > 0
and n_bins >= len(unique_xs):
1778 bin_edges = array.array(
'd', unique_xs)
1781 bin_edges.append(bin_edges[-1] + 1)
1782 return bin_edges, bin_labels
1791 debug(
'Lower bound %s', lower_bound)
1792 debug(
'Upper bound %s', upper_bound)
1793 debug(
'N bins %s', n_bins)
1799 lower_bound=lower_bound,
1800 upper_bound=upper_bound,
1801 outlier_z_score=outlier_z_score,
1802 include_exceptionals=include_exceptionals)
1804 n_bins, lower_bound, upper_bound = bin_range
1806 n_bin_edges = n_bins + 1
1807 if lower_bound != upper_bound:
1809 debug(
"Creating flat distribution binning")
1810 precentiles = np.linspace(0.0, 100.0, n_bin_edges)
1811 bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], precentiles))
1815 bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1820 bin_edges[0] = lower_bound
1821 bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1822 debug(
'Bins %s', bin_edges)
1826 bin_edges = [lower_bound, upper_bound + 1]
1829 bin_edges = array.array(
'd', bin_edges)
1830 debug(
'Bins %s for %s', bin_edges, self.
namename)
1831 return bin_edges,
None
1839 outlier_z_score=None,
1840 include_exceptionals=True):
1841 """Calculates the number of bins, the lower bound and the upper bound from a given data series
1842 estimating the values that are not given.
1844 If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1845 The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1846 mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1847 quantile range (IQR).
1849 If additional include_exceptionals is true the method tries to find exceptional values in the series
1850 and always include them in the range if it finds any.
1851 Exceptional values means exact values that appear often in the series for whatever reason.
1852 Possible reasons include
1853 * Interal / default values
1854 * Failed evaluation conditions
1856 which should be not cropped away automatically if you are locking on the quality of your data.
1860 xs : numpy.ndarray (1d)
1861 Data point for which a binning should be found.
1862 stackbys : numpy.ndarray (1d)
1863 Categories of the data points to be distinguishable
1864 n_bins : int or None, optional
1865 Preset number of desired bins. The default, None, means the bound should be extracted from data.
1866 The rice rule is used the determine the number of bins.
1867 lower_bound : float or None, optional
1868 Preset lower bound of the binning range.
1869 The default, None, means the bound should be extracted from data.
1870 upper_bound : float or None, optional
1871 Preset upper bound of the binning range.
1872 The default, None, means the bound should be extracted from data.
1873 outlier_z_score : float or None, optional
1874 Threshold z-score of outlier detection.
1875 The default, None, means no outlier detection.
1876 include_exceptionals : bool, optional
1877 If the outlier detection is active this switch indicates,
1878 if values detected as exceptionally frequent shall be included
1879 nevertheless into the binning range. Default is True,
1880 which means exceptionally frequent values as included
1881 even if they are detected as outliers.
1885 n_bins, lower_bound, upper_bound : int, float, float
1886 A triple of found number of bins, lower bound and upper bound of the binning range.
1889 if stackbys
is not None:
1890 unique_stackbys = np.unique(stackbys)
1892 for value
in unique_stackbys:
1894 indices_for_value = np.isnan(stackbys)
1896 indices_for_value = stackbys == value
1898 stack_lower_bound, stack_upper_bound = \
1900 lower_bound=lower_bound,
1901 upper_bound=upper_bound,
1902 outlier_z_score=outlier_z_score,
1903 include_exceptionals=include_exceptionals)
1905 stack_ranges.append([stack_lower_bound, stack_upper_bound])
1907 lower_bound = np.nanmin([lwb
for lwb, upb
in stack_ranges])
1908 upper_bound = np.nanmax([upb
for lwb, upb
in stack_ranges])
1912 lower_bound=lower_bound,
1913 upper_bound=upper_bound,
1914 outlier_z_score=outlier_z_score,
1915 include_exceptionals=include_exceptionals)
1920 n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1921 rice_n_bins = int(statistics.rice_n_bin(n_data))
1922 n_bins = rice_n_bins
1925 n_bins = int(n_bins)
1928 message =
'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % n_bins
1929 raise ValueError(message)
1931 return n_bins, lower_bound, upper_bound
1937 outlier_z_score=None,
1938 include_exceptionals=True):
1942 xs : numpy.ndarray (1d)
1943 Data point for which a binning should be found.
1944 lower_bound : float or None, optional
1945 Preset lower bound of the binning range.
1946 The default, None, means the bound should be extracted from data.
1947 upper_bound : float or None, optional
1948 Preset upper bound of the binning range.
1949 The default, None, means the bound should be extracted from data.
1950 outlier_z_score : float or None, optional
1951 Threshold z-score of outlier detection.
1952 The default, None, means no outlier detection.
1953 include_exceptionals : bool, optional
1954 If the outlier detection is active this switch indicates,
1955 if values detected as exceptionally frequent shall be included
1956 nevertheless into the binning range. Default is True,
1957 which means exceptionally frequent values as included
1958 even if they are detected as outliers.
1962 lower_bound, upper_bound : float, float
1963 A pair of found lower bound and upper bound of series.
1965 debug = get_logger().debug
1967 finite_xs_indices = np.isfinite(xs)
1968 if np.any(finite_xs_indices):
1969 finite_xs = xs[finite_xs_indices]
1973 make_symmetric =
False
1974 exclude_outliers = outlier_z_score
is not None and (lower_bound
is None or upper_bound
is None)
1977 if include_exceptionals
or exclude_outliers:
1979 exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1982 if exclude_outliers:
1983 if not np.all(exceptional_indices):
1988 x_mean, x_std = np.nan, np.nan
1990 make_symmetric = abs(x_mean) < x_std / 5.0
and lower_bound
is None and upper_bound
is None
1992 if include_exceptionals
and len(exceptional_xs) != 0:
1993 lower_exceptional_x = np.min(exceptional_xs)
1994 upper_exceptional_x = np.max(exceptional_xs)
1995 make_symmetric =
False
1997 lower_exceptional_x = np.nan
1998 upper_exceptional_x = np.nan
2001 if lower_bound
is None:
2003 lower_bound = np.min(finite_xs)
2007 if outlier_z_score
is not None:
2009 lower_outlier_bound = x_mean - outlier_z_score * x_std
2013 indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2015 if np.any(indices_above_lower_outlier_bound):
2016 lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2019 lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2021 debug(
'Lower bound after outlier detection')
2022 debug(
'Lower bound %s', lower_bound)
2023 debug(
'Lower outlier bound %s', lower_outlier_bound)
2026 if upper_bound
is None:
2028 upper_bound = np.max(finite_xs)
2031 if outlier_z_score
is not None:
2033 upper_outlier_bound = x_mean + outlier_z_score * x_std
2037 indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2039 if np.any(indices_below_upper_outlier_bound):
2040 upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2043 upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2045 debug(
'Upper bound after outlier detection')
2046 debug(
'Upper bound %s', upper_bound)
2047 debug(
'Upper outlier bound %s', upper_outlier_bound)
2049 if make_symmetric
and lower_bound < 0
and upper_bound > 0:
2050 if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2051 abs_bound = max(abs(lower_bound), abs(upper_bound))
2052 lower_bound = -abs_bound
2053 upper_bound = abs_bound
2055 return lower_bound, upper_bound
2059 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2063 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2064 Something having a GetListOfFunctions method that should hold
2065 the combined fit and additional statistics function.
2068 cls.
set_tf1set_tf1(histogram, additional_stats_tf1)
2072 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2076 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2077 Something having a GetListOfFunctions method that should hold
2078 the combined fit and additional statistics function.
2082 cls.
set_tf1set_tf1(histogram, combined_tf1)
2086 """Set the attached TF1 of the histogram.
2090 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2091 Something having a GetListOfFunctions method that should hold
2092 the combined fit and additional statistics function.
2097 tf1.SetName(
"FitAndStats")
2098 histogram.GetListOfFunctions().Add(tf1)
2102 """Delete the attached TF1 from the histogram
2106 histogram : ROOT.TH1 or ROOT.TGraph
2107 Something having a GetListOfFunctions method that holds the fit function
2109 tf1 = histogram.FindObject(
"FitAndStats")
2111 function_list = histogram.GetListOfFunctions()
2112 function_list.Remove(tf1)
2116 """Create a TF1 with the additional statistics from the histogram as parameters.
2120 histogram : ROOT.TH1 or ROOT.TGraph
2121 Something having a GetListOfFunctions method that holds the additional statistics.
2126 Function with the additional statistics as parameters.
2130 if not additional_stats:
2140 formula_string =
'+'.join(
'0*[' + str(i) +
']' for i
in range(len(additional_stats)))
2143 additional_stats_tf1 = ROOT.TF1(
"Stats", formula_string, lower_bound, upper_bound)
2145 for (i, (label, value))
in enumerate(additional_stats.items()):
2149 label = label.replace(
" ",
"-")
2150 additional_stats_tf1.SetParName(i, label)
2151 additional_stats_tf1.FixParameter(i, value)
2153 return additional_stats_tf1
2157 """Combine the fit function and the function carrying the additional statistics to one function.
2163 additional_stats_tf1 : ROOT.TF1
2164 The function carrying the additional statistics as parameters
2170 if additional_stats_tf1
is None:
2176 lower_bound = ctypes.c_double()
2177 upper_bound = ctypes.c_double()
2178 fit_tf1.GetRange(lower_bound, upper_bound)
2179 title = fit_tf1.GetTitle()
2181 combined_formula = additional_stats_tf1.GetExpFormula().Data() +
'+' + fit_tf1.GetExpFormula().Data()
2182 combined_tf1 = ROOT.TF1(
"Combined", combined_formula, lower_bound.value, upper_bound.value)
2183 combined_tf1.SetTitle(title)
2186 chi2 = fit_tf1.GetChisquare()
2187 combined_tf1.SetChisquare(chi2)
2189 ndf = fit_tf1.GetNDF()
2190 combined_tf1.SetNDF(ndf)
2192 n_stats_parameters = additional_stats_tf1.GetNpar()
2195 cls.
copy_tf1_parameterscopy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2201 """Copy the parameters of one TF1 to another.
2205 tf1_source : ROOT.TF1
2206 Take parameters from here
2207 tf1_target : ROOT.TF1
2209 offset : int, optional
2210 Index of the first target parameter to which to copy.
2212 n_parameters = tf1_source.GetNpar()
2215 lower_bound = ctypes.c_double()
2216 upper_bound = ctypes.c_double()
2218 for i_source
in range(n_parameters):
2219 parameter_name = tf1_source.GetParName(i_source)
2220 i_target = tf1_target.GetParNumber(parameter_name)
2224 for i_target
in range(tf1_target.GetNpar()):
2225 if parameter_name == tf1_target.GetParName(i_target):
2231 tf1_target.SetParameter(i_target,
2232 tf1_source.GetParameter(i_source))
2233 tf1_target.SetParError(i_target,
2234 tf1_source.GetParError(i_source))
2236 tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2237 tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2240 """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2257 """Sets the maximum of the vertical plotable range"""
2259 if isinstance(histogram, ROOT.TH1):
2260 histogram.SetMaximum(histogram.GetMaximum(maximum))
2262 histogram.SetMaximum(maximum)
2265 """Sets the minimum of the vertical plotable range"""
2267 if isinstance(histogram, ROOT.TH1):
2268 histogram.SetMinimum(histogram.GetMinimum(minimum))
2270 histogram.SetMinimum(minimum)
2274 """Set the style such that the additional stats entries are shown by the TBrowser"""
2275 belle2_validation_style_name =
"belle2_validation_style"
2276 belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2277 if not belle2_validation_tstyle:
2278 belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2281 belle2_validation_tstyle.SetOptFit(opt_fit)
2284 belle2_validation_tstyle.SetOptStat(opt_stat)
2285 ROOT.gROOT.SetStyle(belle2_validation_style_name)
2290 belle2_validation_tstyle.cd()
2294 """Simple test method"""
2295 ValidationPlot.set_tstyle()
2298 normal_distributed_values = np.random.randn(1000)
2301 normal_distributed_values[i] = np.nan
2303 for i
in range(10, 20):
2304 normal_distributed_values[i] = np.inf
2306 for i
in range(20, 30):
2307 normal_distributed_values[i] = -np.inf
2310 validation_histogram.hist(normal_distributed_values)
2311 validation_histogram.title =
'A normal distribution'
2312 validation_histogram.xlabel =
'normal'
2313 validation_histogram.ylabel =
'frequency'
2314 validation_histogram.fit_gaus()
2318 cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2319 cumulated_histogram.title =
'A cumulated normal distribution'
2320 cumulated_histogram.xlabel =
'normal'
2321 cumulated_histogram.ylabel =
'cdf'
2322 cumulated_histogram.show()
2326 stackby = np.random.binomial(1.0, 0.40, 1000)
2327 stacked_validation_histogram =
ValidationPlot(
'test_stacked_hist')
2328 stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2331 x = np.random.randn(1000)
2332 y = 3 * np.random.randn(1000)
2333 ones = np.ones_like(x)
2336 x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2337 y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2339 stacked_validation_scatter =
ValidationPlot(
'test_stacked_scatter')
2340 stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2343 stacked_validation_profile =
ValidationPlot(
'test_stacked_profile')
2344 stacked_validation_profile.profile(x1, y1, stackby=stackby)
2347 stacked_validation_hist2d =
ValidationPlot(
'test_stacked_hist2d')
2348 stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2351 x = np.linspace(-1, 1, 1000)
2355 diagonal_plot.profile(x, y, bins=50)
2356 diagonal_plot.fit_line()
2360 cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2362 tfile = ROOT.TFile(
'test.root',
'RECREATE')
2364 validation_histogram.write(tfile)
2366 with root_cd(
"expert")
as tdirectory1:
2367 diagonal_plot.write(tdirectory1)
2368 cumulated_profile.write(tdirectory1)
2369 cumulated_histogram.write(tdirectory1)
2371 with root_cd(
"stacked")
as tdirectory2:
2372 stacked_validation_histogram.write(tdirectory2)
2373 stacked_validation_scatter.write()
2374 stacked_validation_profile.write()
2375 stacked_validation_hist2d.write()
2379 tfile = ROOT.TFile(
'test.root')
2380 tBrowser = ROOT.TBrowser()
2381 tBrowser.BrowseObject(tfile)
2386 if __name__ ==
'__main__':
title
cached value of the title for this plot
def format_bin_label(value)
pvalue_warn
custom levels for pvalue warnings
def hist(self, xs, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, is_expert=True)
y_log
Indicator whether the y axes should be displayed as a log scale.
def create_stack(cls, histograms, name, reverse_stack, force_graph=False)
def create_additional_stats_tf1(cls, histogram)
description
description of the plot
contact
contact information for this plot
upper_bound
upper right corner of the hisogram
def write(self, tdirectory=None)
def fit_gaus(self, z_score=None)
ylabel
default label for the histogram's Y axis
int very_sparse_dots_line_style_index
A an index that reference to a dot spacing such that the line is almost invisible for scatter.
def profile(self, xs, ys, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, y_binary=None, y_log=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, gaus_z_score=None, is_expert=True, is_asymmetry=False)
def unpack_2d_param(param)
def is_discrete(xs, max_n_unique=None)
_contact
Contact email address for display on the validation page.
pvalue_error
custom levels for pvalue errors
def grapherrors(self, xs_and_err, ys_and_err, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
def delete_tf1(cls, histogram)
_title
Title of the validation plot.
def create_1d(self, th1_factory, xs, ys=None, weights=None, bins=None, stackby=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None)
def description(self, description)
def __init__(self, name, referenceFileName=None)
def add_stats_entry(cls, histogram, label, value)
def contact(self, contact)
_xlabel
X axes label of the validation plot.
xlabel
cached value of the x-axis label for this plot
def set_tf1(cls, histogram, tf1)
def fill_into(self, plot, xs, ys=None, weights=None, filter=None)
def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1)
def create(self, histogram_template, xs, ys=None, weights=None, stackby=None, cumulation_direction=None, reverse_stack=None)
def set_maximum(self, maximum)
def attach_attributes(self)
def add_nan_inf_stats(cls, histogram, name, xs)
def set_color(self, tobject, root_i_color)
def get_robust_mean_and_std(xs)
def cumulate(cls, histogram, cumulation_direction=None)
def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0)
_is_expert
per default all plots are expert and must be set to non-expert explicitly
def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None)
_description
Description of the plot purpose for display on the validation page.
histograms
A list of the histograms that make up the plot.
def get_additional_stats(cls, histogram)
_check
Detailed check instructions for display on the validation page.
name
A unique name to be used as the name of the ROOT object to be generated.
def set_fit_tf1(cls, histogram, fit_tf1)
def set_minimum(self, minimum)
def fill_into_grouped(self, histogram_template, xs, ys=None, weights=None, groupbys=None, groupby_label="group")
plot
The main plot object, may contain one or more (in case of stacked pltos) histograms.
def determine_range(self, xs, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
def set_additional_stats_tf1(cls, histogram)
def scatter(self, xs, ys, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
_ylabel
Y axes label of the validation plot.
referenceFileName
name of the reference file, if not None the binning will be read from there
def fill_into_tgrapherror(self, graph, xs, ys, filter=None)
def hist2d(self, xs, ys, weights=None, stackby=None, bins=(None, None), lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), allow_discrete=(False, False), quantiles=None, is_expert=True)
lower_bound
lower left corner of the histogram
def determine_bin_range(self, xs, stackbys=None, n_bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
def determine_bin_edges(self, xs, stackbys=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False)
def gaus_slice_fit(cls, th2, name, z_score=None)
check
cached value of the user-check action for this plot
def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None)
def get_exceptional_values(xs)
def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False)
def fill_into_tgraph(self, graph, xs, ys, filter=None)
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)