31 """Get for the logging.Logger instance of this module
36 Logger instance of this module
38 return logging.getLogger(__name__)
42 units_by_quantity_name = {
67 def get_unit(quantity_name):
68 """Infers the unit of a quantity from its name.
70 Assumes the standard Belle II unit system.
72 Currently looks up the quantity string from units_by_quantity_name.
77 Name of a quantity (E.g. pt, x, ...)
84 unit = units_by_quantity_name.get(quantity_name,
None)
88 def compose_axis_label(quantity_name, unit=None):
89 """Formats a quantity name and a unit to a label for a plot axes.
91 If the unit is not given to is tried to infer it
92 from the quantity name by the get_unit function.
97 Name of the quantity to be displayed at the axes
99 The unit of the quantity. Defaults to get_unit(quantity_name)
107 unit = get_unit(quantity_name)
110 axis_label = quantity_name
112 axis_label =
'%s (%s)' % (quantity_name, unit)
117 def get1DBinningFromReference(name, refFileName):
118 """ returns nbins, lowerbound, upperbound for TH1 / TProfile with name "name" found in the file "refFileName"
120 @param name : name of the TH1 object to be looked for in the file
121 @param refFileName : name of the reference file where the object is searched for
123 @return int nbin, float xmin, float xmax of the TH1
130 if refFileName
is None or refFileName ==
"":
131 return nbins, x_min, x_max
134 oldDirectory = ROOT.gROOT.CurrentDirectory().load()
136 tfile = ROOT.TFile(refFileName)
138 objptr = tfile.Get(name)
139 if objptr
and objptr.InheritsFrom(
"TH1"):
140 nbins = objptr.GetNbinsX()
141 x_min = objptr.GetXaxis().GetXmin()
142 x_max = objptr.GetXaxis().GetXmax()
144 basf2.B2WARNING(
'Requested object with name: ' + name +
' not found in file: ' + refFileName +
" (or not a TH1)")
146 basf2.B2WARNING(
'Requested file: ' + refFileName +
' could not be opened')
153 return nbins, x_min, x_max
157 StatsEntry = ROOT.TParameter(float)
162 """Class for generating a validation plot for the Belle II validation page.
164 Typically it generates plots from values stored in numpy arrays and feeds them into
165 plot ROOT classes for storing them.
167 It implements an automatic binning procedure based on the rice rule and
168 robust z score outlier detection.
170 It also keeps track of additional statistics typically neglected by ROOT such as a count
171 for the non finit values such as NaN, +Inf, -Inf.
173 The special attributes for the Belle II validation page like
178 are exposed as properties of this class.
182 very_sparse_dots_line_style_index = 28
186 """Constructor of the ValidationPlot
191 A unique name to be used as the name of the ROOT object to be generated
193 referenceFileName : str
194 name of a reference file. If set the code will try to get the histogram or profile
195 from that file and determine the number of bins and upper and lower bound
196 (so far only implemented for 1D (TH1, TProfile), is ignored for 2D plots)
200 self.
namename = root_save_name(name)
248 outlier_z_score=None,
249 include_exceptionals=True,
250 allow_discrete=False,
251 cumulation_direction=None,
253 """Fill the plot with a one dimensional histogram."""
258 if n
is not None and xmin
is not None and xmax
is not None:
263 th1_factory = ROOT.TH1D
271 lower_bound=lower_bound,
272 upper_bound=upper_bound,
273 outlier_z_score=outlier_z_score,
274 include_exceptionals=include_exceptionals,
275 allow_discrete=allow_discrete,
276 cumulation_direction=cumulation_direction)
294 outlier_z_score=None,
295 include_exceptionals=True,
296 allow_discrete=False,
297 cumulation_direction=None,
300 """Fill the plot with a one dimensional profile of one variable over another."""
308 if n
is not None and xmin
is not None and xmax
is not None:
313 th1_factory = ROOT.TProfile
315 if gaus_z_score
is None:
322 lower_bound=lower_bound,
323 upper_bound=upper_bound,
324 outlier_z_score=outlier_z_score,
325 include_exceptionals=include_exceptionals,
326 allow_discrete=allow_discrete,
327 cumulation_direction=cumulation_direction)
331 self.
hist2dhist2d(xs, ys=ys, weights=weights, stackby=stackby,
333 lower_bound=(lower_bound,
None),
334 upper_bound=(upper_bound,
None),
335 outlier_z_score=(outlier_z_score, outlier_z_score),
336 include_exceptionals=(include_exceptionals,
True),
337 allow_discrete=(allow_discrete,
False),
344 name=histogram.GetName()[1:],
345 z_score=gaus_z_score)
346 profiles.append(profile)
348 self.
plotplot = self.
create_stackcreate_stack(profiles, name=self.
plotplot.GetName()[1:], reverse_stack=
False)
351 self.
y_logy_log =
True
353 if y_binary
or self.
is_binaryis_binary(ys):
358 histogram.SetMinimum(0)
359 histogram.SetMaximum(1.05)
361 self.
plotplot.SetMinimum(0)
362 self.
plotplot.SetMaximum(1.05)
370 lower_bound=(
None,
None),
371 upper_bound=(
None,
None),
372 outlier_z_score=(
None,
None),
373 include_exceptionals=(
True,
True),
376 """Fill the plot with a (unbinned) two dimensional scatter plot"""
380 x_lower_bound, y_lower_bound = self.
unpack_2d_paramunpack_2d_param(lower_bound)
381 x_upper_bound, y_upper_bound = self.
unpack_2d_paramunpack_2d_param(upper_bound)
382 x_outlier_z_score, y_outlier_z_score = self.
unpack_2d_paramunpack_2d_param(outlier_z_score)
383 x_include_exceptionals, y_include_exceptionals = self.
unpack_2d_paramunpack_2d_param(include_exceptionals)
387 lower_bound=x_lower_bound,
388 upper_bound=x_upper_bound,
389 outlier_z_score=x_outlier_z_score,
390 include_exceptionals=x_include_exceptionals
395 lower_bound=y_lower_bound,
396 upper_bound=y_upper_bound,
397 outlier_z_score=y_outlier_z_score,
398 include_exceptionals=y_include_exceptionals
401 graph = ROOT.TGraph()
403 graph.SetName(self.
namename)
404 graph.SetMarkerStyle(6)
405 graph.GetHistogram().SetOption(
"AP")
410 graph.SetLineColorAlpha(color_index, 0)
414 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
415 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
429 lower_bound=(
None,
None),
430 upper_bound=(
None,
None),
431 outlier_z_score=(
None,
None),
432 include_exceptionals=(
True,
True),
435 """Fill the plot with a (unbinned) two dimensional scatter plot
436 xs_and_err and ys_and_err are tuples containing the values and the errors on these values
445 x_lower_bound, y_lower_bound = self.
unpack_2d_paramunpack_2d_param(lower_bound)
446 x_upper_bound, y_upper_bound = self.
unpack_2d_paramunpack_2d_param(upper_bound)
447 x_outlier_z_score, y_outlier_z_score = self.
unpack_2d_paramunpack_2d_param(outlier_z_score)
448 x_include_exceptionals, y_include_exceptionals = self.
unpack_2d_paramunpack_2d_param(include_exceptionals)
452 lower_bound=x_lower_bound,
453 upper_bound=x_upper_bound,
454 outlier_z_score=x_outlier_z_score,
455 include_exceptionals=x_include_exceptionals
460 lower_bound=y_lower_bound,
461 upper_bound=y_upper_bound,
462 outlier_z_score=y_outlier_z_score,
463 include_exceptionals=y_include_exceptionals
466 graph = ROOT.TGraphErrors()
468 graph.SetName(self.
namename)
469 graph.GetHistogram().SetOption(
"A")
471 graph.SetMarkerColor(4)
472 graph.SetMarkerStyle(21)
475 graph.GetXaxis().SetLimits(x_lower_bound, x_upper_bound)
476 graph.GetYaxis().SetLimits(y_lower_bound, y_upper_bound)
492 lower_bound=(
None,
None),
493 upper_bound=(
None,
None),
494 outlier_z_score=(
None,
None),
495 include_exceptionals=(
True,
True),
496 allow_discrete=(
False,
False),
499 """Fill the plot with a two dimensional histogram"""
503 if quantiles
is not None:
504 name =
"_" + self.
namename
507 x_lower_bound, y_lower_bound = self.
unpack_2d_paramunpack_2d_param(lower_bound)
508 x_upper_bound, y_upper_bound = self.
unpack_2d_paramunpack_2d_param(upper_bound)
509 x_outlier_z_score, y_outlier_z_score = self.
unpack_2d_paramunpack_2d_param(outlier_z_score)
510 x_include_exceptionals, y_include_exceptionals = self.
unpack_2d_paramunpack_2d_param(include_exceptionals)
511 x_allow_discrete, y_allow_discrete = self.
unpack_2d_paramunpack_2d_param(allow_discrete)
513 if quantiles
is not None:
514 y_include_exceptionals =
True
515 y_allow_discrete =
False
520 lower_bound=x_lower_bound,
521 upper_bound=x_upper_bound,
522 outlier_z_score=x_outlier_z_score,
523 include_exceptionals=x_include_exceptionals,
524 allow_discrete=x_allow_discrete)
529 lower_bound=y_lower_bound,
530 upper_bound=y_upper_bound,
531 outlier_z_score=y_outlier_z_score,
532 include_exceptionals=y_include_exceptionals,
533 allow_discrete=y_allow_discrete)
535 n_x_bins = len(x_bin_edges) - 1
536 n_y_bins = len(y_bin_edges) - 1
542 histogram = ROOT.TH2D(name,
550 get_logger().info(
"Scatter plot %s is discrete in x.", name)
551 x_taxis = histogram.GetXaxis()
552 for i_x_bin, x_bin_label
in enumerate(x_bin_labels):
553 x_taxis.SetBinLabel(i_x_bin + 1, x_bin_label)
557 x_bin_width = x_bin_edges[1] - x_bin_edges[0]
561 get_logger().info(
"Scatter plot %s is discrete in y.", name)
562 y_taxis = histogram.GetYaxis()
563 for i_y_bin, y_bin_label
in enumerate(y_bin_labels):
564 y_taxis.SetBinLabel(i_y_bin + 1, y_bin_label)
568 y_bin_width = y_bin_edges[1] - y_bin_edges[0]
571 self.
createcreate(histogram, xs, ys=ys, weights=weights, stackby=stackby)
573 if quantiles
is not None:
577 for quantile
in quantiles:
578 profile = histogram.QuantilesX(quantile, histogram.GetName()[1:] +
'_' + str(quantile))
581 x_taxis = histogram.GetXaxis()
582 new_x_taxis = profile.GetXaxis()
583 for i_bin
in range(x_taxis.GetNbins() + 2):
584 label = x_taxis.GetBinLabel(i_bin)
586 new_x_taxis.SetBinLabel(i_bin, label)
589 epsilon = sys.float_info.epsilon
590 for i_bin
in range(0, profile.GetNbinsX() + 2):
591 profile.SetBinError(i_bin, epsilon)
593 profiles.append(profile)
596 self.
plotplot = self.
create_stackcreate_stack(profiles, name=self.
plotplot.GetName()[1:], reverse_stack=
False, force_graph=
True)
601 x_taxis = histogram.GetXaxis()
602 x_bin_edges = array.array(
"d", list(range(len(x_bin_labels) + 1)))
603 x_taxis.Set(n_x_bins, x_bin_edges)
608 x_taxis = histogram.GetXaxis()
609 y_bin_edges = array.array(
"d", list(range(len(y_bin_labels) + 1)))
610 y_taxis.Set(n_y_bins, y_bin_edges)
615 """Fit a gaus belle curve to the central portion of a one dimensional histogram
617 The fit is applied to the central mean +- z_score * std interval of the histogram,
618 such that it is less influence by non gaussian tails further away than the given z score.
620 @param float z_score number of sigmas to include from the mean value of the histogram.
627 raise RuntimeError(
'Validation plot must be filled before it can be fitted.')
629 if not isinstance(plot, ROOT.TH1D):
630 raise RuntimeError(
'Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
634 fit_tf1 = ROOT.TF1(
"Fit", formula)
635 fit_tf1.SetTitle(title)
636 fit_tf1.SetParName(0,
"n")
637 fit_tf1.SetParName(1,
"mean")
638 fit_tf1.SetParName(2,
"std")
640 n = histogram.GetSumOfWeights()
641 mean = histogram.GetMean()
642 std = histogram.GetStdDev()
644 fit_tf1.SetParameter(0, n)
645 fit_tf1.SetParameter(1, mean)
646 fit_tf1.SetParameter(2, std)
649 return self.
fitfit(fit_tf1,
654 """Fit a general line to a one dimensional histogram"""
657 fit_tf1 = ROOT.TF1(
"Fit", formula)
658 fit_tf1.SetTitle(title)
659 fit_tf1.SetParName(0,
"slope")
660 fit_tf1.SetParName(1,
"intercept")
661 self.
fitfit(fit_tf1,
'M')
664 """Fit a constant function to a one dimensional histogram"""
667 fit_tf1 = ROOT.TF1(
"Fit", formula)
668 fit_tf1.SetTitle(title)
669 fit_tf1.SetParName(0,
"intercept")
670 self.
fitfit(fit_tf1,
'M')
673 """Fit a diagonal line through the origin to a one dimensional histogram"""
676 fit_tf1 = ROOT.TF1(
"Fit", formula)
677 fit_tf1.SetTitle(title)
678 fit_tf1.SetParName(0,
"slope")
679 self.
fitfit(fit_tf1,
'M')
681 def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None):
682 """Fit a user defined function to a one dimensional histogram
687 Formula string or TH1 to be fitted. See TF1 constructurs for that is a valid formula
689 Options string to be used in the fit. See TH1::Fit()
691 Lower bound of the range to be fitted
693 Upper bound of the range to be fitted
697 raise RuntimeError(
'Validation plot must be filled before it can be fitted.')
699 if not isinstance(plot, ROOT.TH1D):
700 raise RuntimeError(
'Fitting is currently implemented / tested for one dimensional, non stacked validation plots.')
704 xaxis = histogram.GetXaxis()
705 n_bins = xaxis.GetNbins()
706 hist_lower_bound = xaxis.GetBinLowEdge(1)
707 hist_upper_bound = xaxis.GetBinUpEdge(n_bins)
709 if z_score
is not None:
710 mean = histogram.GetMean()
711 std = histogram.GetStdDev()
713 if lower_bound
is None:
714 lower_bound = mean - z_score * std
716 if upper_bound
is None:
717 upper_bound = mean + z_score * std
720 if isinstance(formula, ROOT.TF1):
722 fit_tf1.SetRange(hist_lower_bound, hist_upper_bound)
724 fit_tf1 = ROOT.TF1(
"Fit",
728 get_logger().info(
'Fitting with %s', fit_tf1.GetExpFormula())
731 if lower_bound
is None or lower_bound < hist_lower_bound:
732 lower_bound = hist_lower_bound
733 if upper_bound
is None or upper_bound > hist_upper_bound:
734 upper_bound = hist_upper_bound
738 if 'N' not in options:
741 fit_res = histogram.Fit(fit_tf1, options +
"S",
"", lower_bound, upper_bound)
751 raise ValueError(
"Can not show a validation plot that has not been filled.")
754 """Write the plot to file
758 tdirectory : ROOT.TDirectory, optional
759 ROOT directory to which the plot should be written.
760 If omitted write to the current directory
762 if not self.
plotplot:
763 raise ValueError(
"Can not write a validation plot that has not been filled.")
765 with root_cd(tdirectory):
766 ValidationPlot.set_tstyle()
768 self.
plotplot.Write()
771 meta_options = [
"nostats"]
775 meta_options.append(
"expert")
777 meta_options.append(
"shifter")
781 meta_options.append(
"pvalue-error={}".format(self.
pvalue_errorpvalue_error))
783 meta_options.append(
"pvalue-warn={}".format(self.
pvalue_warnpvalue_warn))
787 meta_options.append(
"logy")
789 meta_options_str =
",".join(meta_options)
792 histogram.GetListOfFunctions().Add(ROOT.TNamed(
'MetaOptions', meta_options_str))
797 """Getter method if an plot plot is marked as expert plot"""
802 """Getter for the plot title"""
807 """Setter for the plot title"""
810 self.
plotplot.SetTitle(title)
812 histogram.SetTitle(title)
816 """Getter for the axis label at the x axis"""
821 """Setter for the axis label at the x axis"""
824 histogram.GetXaxis().SetTitle(xlabel)
828 """Getter for the axis label at the y axis"""
833 """Setter for the axis label at the y axis"""
836 histogram.GetYaxis().SetTitle(ylabel)
840 """Getter for the contact email address to be displayed on the validation page"""
845 """Setter for the contact email address to be displayed on the validation page"""
848 found_obj = histogram.FindObject(
'Contact')
850 tnamed = ROOT.TNamed(
"Contact", contact)
851 histogram.GetListOfFunctions().Add(tnamed)
852 found_obj = histogram.FindObject(
'Contact')
853 found_obj.SetTitle(contact)
857 """Getter for the description to be displayed on the validation page"""
862 """Setter for the description to be displayed on the validation page"""
865 found_obj = histogram.FindObject(
'Description')
867 tnamed = ROOT.TNamed(
"Description", description)
868 histogram.GetListOfFunctions().Add(tnamed)
869 found_obj = histogram.FindObject(
'Description')
870 found_obj.SetTitle(description)
874 """Getter for the check to be displayed on the validation page"""
879 """Setter for the check to be displayed on the validation page"""
882 found_obj = histogram.FindObject(
'Check')
884 tnamed = ROOT.TNamed(
"Check", check)
885 histogram.GetListOfFunctions().Add(tnamed)
886 found_obj = histogram.FindObject(
'Check')
887 found_obj.SetTitle(check)
894 """Unpacks a function parameter for the two dimensional plots.
896 If it is a pair the first parameter shall apply to the x coordinate
897 the second to the y coordinate. In this case the pair is returned as two values
899 If something else is given the it is assumed that this parameter should equally apply
900 to both coordinates. In this case the same values is return twice as a pair.
904 param : pair or single value
905 Function parameter for a two dimensional plot
910 A pair of values being the parameter for the x coordinate and
911 the y coordinate respectively
915 x_param, y_param = param
919 return x_param, y_param
923 """Determine if the data consists of boolean values"""
924 return statistics.is_binary_series(xs)
928 """Determine if the data consists of discrete values"""
929 return statistics.is_discrete_series(xs, max_n_unique=max_n_unique)
933 """Find exceptionally frequent values
943 A list of the found exceptional values.
945 return statistics.rice_exceptional_values(xs)
949 """Does an estimation of mean and standard deviation robust against outliers.
959 Pair of mean and standard deviation
961 x_mean = statistics.truncated_mean(xs)
962 x_std = statistics.trimmed_std(xs)
967 """Formats a value to be placed at a tick on an axis."""
968 if np.isfinite(value)
and value == np.round(value):
969 return str(int(value))
971 formated_value =
"{:.5g}".format(value)
974 if len(formated_value) > 8:
975 formated_value =
"{:.3e}".format(value)
976 return formated_value
987 outlier_z_score=None,
988 include_exceptionals=True,
989 allow_discrete=False,
990 cumulation_direction=None):
991 """Combined factory method for creating a one dimensional histogram or a profile plot."""
995 xs = np.array(xs, copy=
False)
998 ys = np.array(ys, copy=
False)
1000 if weights
is not None:
1001 weights = np.array(weights, copy=
False)
1006 lower_bound=lower_bound,
1007 upper_bound=upper_bound,
1008 outlier_z_score=outlier_z_score,
1009 include_exceptionals=include_exceptionals,
1010 allow_discrete=allow_discrete)
1012 n_bins = len(bin_edges) - 1
1015 histogram = th1_factory(name,
'', n_bins, bin_edges)
1018 get_logger().info(
"One dimensional plot %s is discrete in x.", name)
1019 x_taxis = histogram.GetXaxis()
1020 for i_bin, bin_label
in enumerate(bin_labels):
1021 x_taxis.SetBinLabel(i_bin + 1, bin_label)
1025 bin_width = bin_edges[1] - bin_edges[0]
1028 self.
createcreate(histogram,
1033 cumulation_direction=cumulation_direction,
1040 x_taxis = histogram.GetXaxis()
1041 bin_edges = array.array(
"d", list(range(len(bin_labels) + 1)))
1042 x_taxis.Set(n_bins, bin_edges)
1050 cumulation_direction=None,
1051 reverse_stack=None):
1052 """Create histograms from a template, possibly stacked"""
1057 histogram = histogram_template
1058 self.
fill_intofill_into(histogram, xs, ys, weights=weights)
1059 if cumulation_direction
is not None:
1060 histogram = self.
cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1062 histograms.append(histogram)
1066 stackby = np.array(stackby, copy=
False)
1067 name = histogram_template.GetName()
1074 groupby_label=
"stack")
1076 if cumulation_direction
is not None:
1077 histograms = [self.
cumulatecumulate(histogram, cumulation_direction=cumulation_direction)
1078 for histogram
in histograms]
1080 plot = self.
create_stackcreate_stack(histograms, name=name +
"_stacked", reverse_stack=reverse_stack)
1083 self.
plotplot = plot
1088 """Create a stack of histograms"""
1089 if len(histograms) == 1:
1090 plot = histograms[0]
1092 if isinstance(histograms[0], (ROOT.TProfile, ROOT.TGraph))
or force_graph:
1093 plot = ROOT.TMultiGraph()
1095 plot = ROOT.THStack()
1102 for histogram
in reversed(histograms):
1103 if isinstance(histogram, ROOT.TProfile)
or (isinstance(histogram, ROOT.TH1)
and force_graph):
1105 plot.Add(histogram,
"APZ")
1109 for histogram
in histograms:
1110 if isinstance(histogram, ROOT.TProfile)
or (isinstance(histogram, ROOT.TH1)
and force_graph):
1112 plot.Add(histogram,
"APZ")
1120 """Extract errors from a TProfile histogram and create a TGraph from these"""
1121 if isinstance(tprofile, ROOT.TGraph):
1124 x_taxis = tprofile.GetXaxis()
1125 n_bins = x_taxis.GetNbins()
1128 bin_ids_without_underflow = list(range(1, n_bins + 1))
1130 bin_centers = np.array([x_taxis.GetBinCenter(i_bin)
for i_bin
in bin_ids_without_underflow])
1132 bin_centers = np.abs(bin_centers)
1133 bin_widths = np.array([x_taxis.GetBinWidth(i_bin)
for i_bin
in bin_ids_without_underflow])
1134 bin_x_errors = bin_widths / 2.0
1137 bin_contents = np.array([tprofile.GetBinContent(i_bin)
for i_bin
in bin_ids_without_underflow])
1138 bin_y_errors = np.array([tprofile.GetBinError(i_bin)
for i_bin
in bin_ids_without_underflow])
1140 tgrapherrors = ROOT.TGraphErrors(n_bins, bin_centers, bin_contents, bin_x_errors, bin_y_errors)
1142 tgrapherrors.GetHistogram().SetOption(
"APZ")
1144 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1145 tgrapherrors.SetLineColor(tprofile.GetLineColor())
1148 for tobject
in tprofile.GetListOfFunctions():
1149 tgrapherrors.GetListOfFunctions().Add(tobject.Clone())
1152 cls.
add_stats_entryadd_stats_entry(tgrapherrors,
'count', tprofile.GetEntries())
1154 stats_values = np.array([np.nan] * 6)
1155 tprofile.GetStats(stats_values)
1157 sum_w = stats_values[0]
1159 sum_wx = stats_values[2]
1160 sum_wx2 = stats_values[3]
1161 sum_wy = stats_values[4]
1162 sum_wy2 = stats_values[5]
1170 np.sqrt(sum_wx2 * sum_w - sum_wx * sum_wx) / sum_w)
1178 np.sqrt(sum_wy2 * sum_w - sum_wy * sum_wy) / sum_w)
1182 tgrapherrors.GetCovariance())
1186 tgrapherrors.GetCorrelationFactor())
1196 groupby_label="group"):
1197 """Fill data into similar histograms in groups indicated by a groupby array"""
1200 unique_groupbys = np.unique(groupbys)
1201 name = histogram_template.GetName()
1203 for i_value, value
in enumerate(unique_groupbys):
1205 indices_for_value = np.isnan(groupbys)
1207 indices_for_value = groupbys == value
1210 histogram_for_value = histogram_template.Clone(name +
'_' + str(value))
1211 i_root_color = i_value + 1
1213 self.
set_colorset_color(histogram_for_value, i_root_color)
1216 self.
add_stats_entryadd_stats_entry(histogram_for_value, groupby_label, value)
1218 self.
fill_intofill_into(histogram_for_value,
1222 filter=indices_for_value)
1224 histograms.append(histogram_for_value)
1229 """Set the color of the ROOT object.
1231 By default the line color of a TGraph should be invisible, so do not change it
1232 For other objects set the marker and the line color
1236 tobject : Plotable object inheriting from TAttLine and TAttMarker such as TGraph or TH1
1237 Object of which the color should be set.
1239 Color index of the ROOT color table
1241 if isinstance(tobject, ROOT.TGraph):
1242 tobject.SetMarkerColor(root_i_color)
1244 tobject.SetLineColor(root_i_color)
1245 tobject.SetMarkerColor(root_i_color)
1247 def fill_into(self, plot, xs, ys=None, weights=None, filter=None):
1248 """Fill the data into the plot object"""
1249 if isinstance(plot, ROOT.TGraph):
1251 raise ValueError(
"ys are required for filling a graph")
1253 elif isinstance(plot, ROOT.TGraphErrors):
1255 raise ValueError(
"ys are required for filling a graph error")
1259 self.
fill_into_th1fill_into_th1(plot, xs, ys, weights=weights, filter=filter)
1262 """fill point values and error of the x and y axis into the graph"""
1264 assert(len(xs[0]) == len(ys[0]))
1266 graph.Set(len(xs[0]))
1268 for i
in range(len(xs[0])):
1269 graph.SetPoint(i, xs[0][i], ys[0][i])
1270 graph.SetPointError(i, xs[1][i], ys[1][i])
1273 """Fill the data into a TGraph"""
1277 filter =
slice(
None)
1287 if x_n_data > max_n_data
or y_n_data > max_n_data:
1288 get_logger().warning(
"Number of points in scatter graph %s exceed limit %s" %
1289 (self.
namename, max_n_data))
1291 get_logger().warning(
"Cropping %s" % max_n_data)
1293 xs = xs[0:max_n_data]
1294 ys = ys[0:max_n_data]
1296 x_axis = graph.GetXaxis()
1297 y_axis = graph.GetYaxis()
1299 x_lower_bound = x_axis.GetXmin()
1300 x_upper_bound = x_axis.GetXmax()
1302 y_lower_bound = y_axis.GetXmin()
1303 y_upper_bound = y_axis.GetXmax()
1305 x_underflow_indices = xs < x_lower_bound
1306 x_overflow_indices = xs > x_upper_bound
1308 y_underflow_indices = ys < y_lower_bound
1309 y_overflow_indices = ys > y_upper_bound
1311 plot_indices = ~(np.isnan(xs) |
1312 x_underflow_indices |
1313 x_overflow_indices |
1315 y_underflow_indices |
1318 n_plot_data = np.sum(plot_indices)
1319 plot_xs = xs[plot_indices]
1320 plot_ys = ys[plot_indices]
1322 graph.Set(int(n_plot_data))
1323 for i, (x, y)
in enumerate(zip(plot_xs, plot_ys)):
1324 graph.SetPoint(i, x, y)
1326 self.
add_stats_entryadd_stats_entry(graph,
'count', np.sum(np.isfinite(xs)))
1331 x_n_underflow = np.sum(x_underflow_indices)
1333 self.
add_stats_entryadd_stats_entry(graph,
'x underf.', x_n_underflow)
1335 x_n_overflow = np.sum(x_overflow_indices)
1339 y_n_underflow = np.sum(y_underflow_indices)
1341 self.
add_stats_entryadd_stats_entry(graph,
'y underf.', y_n_underflow)
1343 y_n_overflow = np.sum(y_overflow_indices)
1353 self.
add_stats_entryadd_stats_entry(graph,
'cov', graph.GetCovariance())
1354 self.
add_stats_entryadd_stats_entry(graph,
'corr', graph.GetCorrelationFactor())
1357 """Fill the histogram blocking non finite values
1361 histogram : ROOT.TH1
1362 The histogram to be filled
1363 xs : numpy.ndarray (1d)
1364 Data for the first axes
1365 ys : numpy.ndarray (1d), optional
1366 Data for the second axes
1367 weights : numpy.ndarray (1d), optional
1368 Weight of the individual points. Defaults to one for each
1369 filter : numpy.ndarray, optional
1370 Boolean index array indicating which entries shall be taken.
1374 filter =
slice(
None)
1379 finite_filter = np.isfinite(xs)
1385 finite_filter &= np.isfinite(ys)
1388 xs = xs[finite_filter]
1389 weights = np.ones_like(xs)
1391 weights = weights[filter]
1393 finite_filter &= np.isfinite(weights)
1394 xs = xs[finite_filter]
1395 weights[finite_filter]
1398 ys = ys[finite_filter]
1403 except AttributeError:
1404 Fill = histogram.Fill
1406 fill = np.frompyfunc(Fill, 2, 1)
1407 fill(xs.astype(np.float64, copy=
False),
1408 weights.astype(np.float64, copy=
False))
1410 fill = np.frompyfunc(Fill, 3, 1)
1411 fill(xs.astype(np.float64, copy=
False),
1412 ys.astype(np.float64, copy=
False),
1413 weights.astype(np.float64, copy=
False))
1417 xs = xs.astype(np.float64, copy=
False)
1418 weights = weights.astype(np.float64, copy=
False)
1421 histogram.FillN(n, xs, weights)
1423 basf2.B2WARNING(
"No values to be filled into histogram: " + self.
namename)
1427 xs = xs.astype(np.float64, copy=
False)
1428 ys = ys.astype(np.float64, copy=
False)
1429 weights = weights.astype(np.float64, copy=
False)
1432 histogram.FillN(n, xs, ys, weights)
1434 basf2.B2WARNING(
"No values to be filled into histogram: " + self.
namename)
1440 """ Extracts the counts of non finite floats from a series
1441 and adds them as additional statistics to the histogram.
1445 histogram : derived from ROOT.TH1 or ROOT.TGraph
1446 Something having a GetListOfFunctions method that
1448 A label for the data series to be prefixed to the entries.
1449 xs : numpy.ndarray (1d)
1450 Data from which the non finit floats should be counted.
1452 n_nans = np.isnan(xs).sum()
1456 n_positive_inf = np.sum(xs == np.inf)
1457 if n_positive_inf > 0:
1458 cls.
add_stats_entryadd_stats_entry(histogram, name +
' pos inf', n_positive_inf)
1460 n_negative_inf = np.sum(xs == -np.inf)
1461 if n_negative_inf > 0:
1462 cls.
add_stats_entryadd_stats_entry(histogram, name +
' neg inf', n_negative_inf)
1466 """Add a new additional statistics to the histogram.
1470 histogram : derived from ROOT.TH1 or ROOT.TGraph
1471 Something having a GetListOfFunctions method that holds the additional statistics
1473 Label of the statistic
1475 Value of the statistic
1477 stats_entry = StatsEntry(str(label), float(value))
1478 histogram.GetListOfFunctions().Add(stats_entry)
1483 """Get the additional statistics from the histogram and return them a dict.
1487 histogram : derived from ROOT.TH1 or ROOT.TGraph
1488 Something having a GetListOfFunctions method that holds the additional statistics
1492 collection.OrderedDict
1493 A map of labels to values for the additional statistics
1495 additional_stats = collections.OrderedDict()
1496 for tobject
in histogram.GetListOfFunctions():
1497 if isinstance(tobject, StatsEntry):
1498 stats_entry = tobject
1499 label = stats_entry.GetName()
1500 value = stats_entry.GetVal()
1501 additional_stats[label] = value
1502 return additional_stats
1506 """Extract a slice of a scatterplot and apply a Gaussian fit to it"""
1509 y_taxis = th2.GetYaxis()
1510 th2_lower_bound = y_taxis.GetXmin()
1511 th2_upper_bound = y_taxis.GetXmax()
1512 th2_height = y_taxis.GetXmax() - y_taxis.GetXmin()
1513 n_y_bins = y_taxis.GetNbins()
1515 y_mean = th2.GetMean(2)
1516 y_std = th2.GetStdDev(2)
1517 fit_lower_bound = max(th2_lower_bound, y_mean - z_score * y_std)
1518 fit_upper_bound = min(th2_upper_bound, y_mean + z_score * y_std)
1519 fit_height = fit_upper_bound - fit_lower_bound
1521 required_n_bins_inslice_filled = n_y_bins * fit_height / th2_height
1523 fit_lower_bound = th2_lower_bound
1524 fit_upper_bound = th2_upper_bound
1525 fit_height = fit_upper_bound - fit_lower_bound
1526 required_n_bins_inslice_filled = n_y_bins / 1.61
1529 required_n_bins_inslice_filled = min(required_n_bins_inslice_filled, n_y_bins / 1.61)
1531 fit_tf1 = ROOT.TF1(
"Fit",
"gaus", fit_lower_bound, fit_upper_bound)
1532 fit_tf1.SetParName(0,
"n")
1533 fit_tf1.SetParName(1,
"mean")
1534 fit_tf1.SetParName(2,
"std")
1538 param_fit_th1s = ROOT.TObjArray()
1539 th2.FitSlicesY(fit_tf1, i_first_bin, i_last_bin,
1540 int(required_n_bins_inslice_filled),
1541 fit_options, param_fit_th1s)
1543 th1_means = param_fit_th1s.At(1)
1544 th1_means.SetName(name)
1545 th1_means.SetTitle(th2.GetTitle())
1550 x_taxis = th2.GetXaxis()
1551 new_x_taxis = th1_means.GetXaxis()
1552 for i_bin
in range(x_taxis.GetNbins() + 2):
1553 label = x_taxis.GetBinLabel(i_bin)
1555 new_x_taxis.SetBinLabel(i_bin, label)
1558 data_lower_bound = th1_means.GetMinimum(fit_lower_bound)
1559 data_upper_bound = th1_means.GetMaximum(fit_upper_bound)
1560 data_height = data_upper_bound - data_lower_bound
1562 plot_lower_bound = max(fit_lower_bound, data_lower_bound - 0.05 * data_height)
1563 plot_upper_bound = min(fit_upper_bound, data_upper_bound + 0.05 * data_height)
1565 th1_means.SetMinimum(plot_lower_bound)
1566 th1_means.SetMaximum(plot_upper_bound)
1571 def cumulate(cls, histogram, cumulation_direction=None):
1572 """Cumulates the histogram inplace.
1576 histogram : ROOT.TH1 or ROOT.TProfile
1577 Filled histogram to be cumulated
1578 cumulation_direction : int, optional
1579 Direction is indicated by the sign.
1580 Positive means from left to right, negative means from right to left.
1581 If now cumulation direction is given return the histogram as is.
1586 Cumulated histogram potentially altered inplace.
1588 if not cumulation_direction:
1591 cumulate_backward = cumulation_direction < 0
1592 cumulate_forward =
not cumulate_backward
1594 if isinstance(histogram, ROOT.TH2):
1595 raise ValueError(
"Cannot cumulate a two dimensional histogram.")
1597 if isinstance(histogram, ROOT.TH3):
1598 raise ValueError(
"Cannot cumulate a three dimensional histogram.")
1600 if not isinstance(histogram, ROOT.TH1):
1601 raise ValueError(
"Can only cumulate a one dimensional histogram.")
1603 if isinstance(histogram, ROOT.TProfile):
1604 tprofile = histogram
1607 tgraph.SetName(tprofile.GetName())
1609 n_bins = histogram.GetNbinsX()
1611 cumulated_content = 0.0
1612 cumulated_entries = 0
1616 i_bins = list(range(0, n_bins + 2))
1617 if not cumulate_forward:
1618 i_bins = reversed(i_bins)
1620 for i_bin
in i_bins:
1622 bin_content = tprofile.GetBinContent(i_bin)
1623 bin_entries = tprofile.GetBinEffectiveEntries(i_bin)
1624 bin_std = tprofile.GetBinError(i_bin)
1626 if bin_entries != 0:
1627 cumulated_content = (
1628 1.0 * (cumulated_entries * cumulated_content + bin_entries * bin_content) /
1629 (cumulated_entries + bin_entries)
1633 math.hypot(cumulated_entries * cumulated_std, bin_entries * bin_std) /
1634 (cumulated_entries + bin_entries)
1637 cumulated_entries = cumulated_entries + bin_entries
1642 if i_point >= 0
and i_point < n_points:
1643 x = tgraph.GetX()[i_point]
1647 tgraph.SetPoint(i_point, x, cumulated_content)
1649 x_error = tgraph.GetErrorX(i_point)
1650 tgraph.SetPointError(i_point, x_error, cumulated_std)
1655 n_bins = histogram.GetNbinsX()
1656 cumulated_content = 0.0
1658 i_bins = list(range(0, n_bins + 2))
1659 if not cumulate_forward:
1660 i_bins = reversed(i_bins)
1662 for i_bin
in i_bins:
1663 bin_content = histogram.GetBinContent(i_bin)
1664 cumulated_content += bin_content
1665 histogram.SetBinContent(i_bin, cumulated_content)
1675 outlier_z_score=None,
1676 include_exceptionals=True,
1677 allow_discrete=False):
1678 """Deducing bin edges from a data series.
1682 xs : numpy.ndarray (1d)
1683 Data point for which a binning should be found.
1684 stackbys : numpy.ndarray (1d)
1685 Categories of the data points to be distinguishable
1686 bins : list(float) or int or None, optional
1687 Preset bin edges or preset number of desired bins.
1688 The default, None, means the bound should be extracted from data.
1689 The rice rule is used the determine the number of bins.
1690 If a list of floats is given return them immediatly.
1691 lower_bound : float or None, optional
1692 Preset lower bound of the binning range.
1693 The default, None, means the bound should be extracted from data.
1694 upper_bound : float or None, optional
1695 Preset upper bound of the binning range.
1696 The default, None, means the bound should be extracted from data.
1697 outlier_z_score : float or None, optional
1698 Threshold z-score of outlier detection.
1699 The default, None, means no outlier detection.
1700 include_exceptionals : bool, optional
1701 If the outlier detection is active this switch indicates,
1702 if values detected as exceptionally frequent shall be included
1703 nevertheless into the binning range. Default is True,
1704 which means exceptionally frequent values as included
1705 even if they are detected as outliers.
1709 np.array (1d), list(str)
1710 Pair of bin edges and labels deduced from the series.
1711 Second element is None if the series is not detected as discrete.
1713 debug = get_logger().debug
1714 debug(
'Determine binning for plot named %s', self.
namename)
1720 elif isinstance(bins, collections.Iterable):
1724 bin_edges = array.array(
'd', bin_edges)
1726 return bin_edges, bin_labels
1737 message =
'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % bins
1738 raise ValueError(message)
1741 xs = np.array(xs, copy=
False)
1745 debug(
'Discrete binning values encountered')
1746 finite_xs = xs[np.isfinite(xs)]
1747 unique_xs = np.unique(finite_xs)
1750 if lower_bound
is None:
1751 if len(unique_xs) == 0:
1752 if upper_bound
is None:
1755 lower_bound = upper_bound - 1
1757 lower_bound = np.min(unique_xs)
1759 unique_xs = unique_xs[unique_xs >= lower_bound]
1761 if upper_bound
is None:
1762 if len(unique_xs) == 0:
1763 upper_bound = lower_bound + 1
1765 upper_bound = np.min(unique_xs)
1767 unique_xs = unique_xs[unique_xs <= upper_bound]
1770 n_bins = len(unique_xs)
or 1
1772 if len(unique_xs) > 0
and n_bins >= len(unique_xs):
1774 bin_edges = array.array(
'd', unique_xs)
1777 bin_edges.append(bin_edges[-1] + 1)
1778 return bin_edges, bin_labels
1787 debug(
'Lower bound %s', lower_bound)
1788 debug(
'Upper bound %s', upper_bound)
1789 debug(
'N bins %s', n_bins)
1795 lower_bound=lower_bound,
1796 upper_bound=upper_bound,
1797 outlier_z_score=outlier_z_score,
1798 include_exceptionals=include_exceptionals)
1800 n_bins, lower_bound, upper_bound = bin_range
1802 n_bin_edges = n_bins + 1
1803 if lower_bound != upper_bound:
1805 debug(
"Creating flat distribution binning")
1806 precentiles = np.linspace(0.0, 100.0, n_bin_edges)
1807 bin_edges = np.unique(np.nanpercentile(xs[(lower_bound <= xs) & (xs <= upper_bound)], precentiles))
1811 bin_edges = np.linspace(lower_bound, upper_bound, n_bin_edges)
1816 bin_edges[0] = lower_bound
1817 bin_edges[-1] = np.nextafter(upper_bound, np.inf)
1818 debug(
'Bins %s', bin_edges)
1822 bin_edges = [lower_bound, upper_bound + 1]
1825 bin_edges = array.array(
'd', bin_edges)
1826 debug(
'Bins %s for %s', bin_edges, self.
namename)
1827 return bin_edges,
None
1835 outlier_z_score=None,
1836 include_exceptionals=True):
1837 """Calculates the number of bins, the lower bound and the upper bound from a given data series
1838 estimating the values that are not given.
1840 If the outlier_z_score is given the method tries to exclude outliers that exceed a certain z-score.
1841 The z-score is calculated (x - x_mean) / x_std. The be robust against outliers the necessary
1842 mean and std deviation are based on truncated mean and a trimmed std calculated from the inter
1843 quantile range (IQR).
1845 If additional include_exceptionals is true the method tries to find exceptional values in the series
1846 and always include them in the range if it finds any.
1847 Exceptional values means exact values that appear often in the series for whatever reason.
1848 Possible reasons include
1849 * Interal / default values
1850 * Failed evaluation conditions
1852 which should be not cropped away automatically if you are locking on the quality of your data.
1856 xs : numpy.ndarray (1d)
1857 Data point for which a binning should be found.
1858 stackbys : numpy.ndarray (1d)
1859 Categories of the data points to be distinguishable
1860 n_bins : int or None, optional
1861 Preset number of desired bins. The default, None, means the bound should be extracted from data.
1862 The rice rule is used the determine the number of bins.
1863 lower_bound : float or None, optional
1864 Preset lower bound of the binning range.
1865 The default, None, means the bound should be extracted from data.
1866 upper_bound : float or None, optional
1867 Preset upper bound of the binning range.
1868 The default, None, means the bound should be extracted from data.
1869 outlier_z_score : float or None, optional
1870 Threshold z-score of outlier detection.
1871 The default, None, means no outlier detection.
1872 include_exceptionals : bool, optional
1873 If the outlier detection is active this switch indicates,
1874 if values detected as exceptionally frequent shall be included
1875 nevertheless into the binning range. Default is True,
1876 which means exceptionally frequent values as included
1877 even if they are detected as outliers.
1881 n_bins, lower_bound, upper_bound : int, float, float
1882 A triple of found number of bins, lower bound and upper bound of the binning range.
1885 if stackbys
is not None:
1886 unique_stackbys = np.unique(stackbys)
1888 for value
in unique_stackbys:
1890 indices_for_value = np.isnan(stackbys)
1892 indices_for_value = stackbys == value
1894 stack_lower_bound, stack_upper_bound = \
1896 lower_bound=lower_bound,
1897 upper_bound=upper_bound,
1898 outlier_z_score=outlier_z_score,
1899 include_exceptionals=include_exceptionals)
1901 stack_ranges.append([stack_lower_bound, stack_upper_bound])
1903 lower_bound = np.nanmin([lwb
for lwb, upb
in stack_ranges])
1904 upper_bound = np.nanmax([upb
for lwb, upb
in stack_ranges])
1908 lower_bound=lower_bound,
1909 upper_bound=upper_bound,
1910 outlier_z_score=outlier_z_score,
1911 include_exceptionals=include_exceptionals)
1916 n_data = np.sum((lower_bound <= xs) & (xs <= upper_bound))
1917 rice_n_bins = int(statistics.rice_n_bin(n_data))
1918 n_bins = rice_n_bins
1921 n_bins = int(n_bins)
1924 message =
'Cannot accept n_bins=%s as number of bins, because it is not a number greater than 0.' % n_bins
1925 raise ValueError(message)
1927 return n_bins, lower_bound, upper_bound
1933 outlier_z_score=None,
1934 include_exceptionals=True):
1938 xs : numpy.ndarray (1d)
1939 Data point for which a binning should be found.
1940 lower_bound : float or None, optional
1941 Preset lower bound of the binning range.
1942 The default, None, means the bound should be extracted from data.
1943 upper_bound : float or None, optional
1944 Preset upper bound of the binning range.
1945 The default, None, means the bound should be extracted from data.
1946 outlier_z_score : float or None, optional
1947 Threshold z-score of outlier detection.
1948 The default, None, means no outlier detection.
1949 include_exceptionals : bool, optional
1950 If the outlier detection is active this switch indicates,
1951 if values detected as exceptionally frequent shall be included
1952 nevertheless into the binning range. Default is True,
1953 which means exceptionally frequent values as included
1954 even if they are detected as outliers.
1958 lower_bound, upper_bound : float, float
1959 A pair of found lower bound and upper bound of series.
1961 debug = get_logger().debug
1963 finite_xs_indices = np.isfinite(xs)
1964 if np.any(finite_xs_indices):
1965 finite_xs = xs[finite_xs_indices]
1969 make_symmetric =
False
1970 exclude_outliers = outlier_z_score
is not None and (lower_bound
is None or upper_bound
is None)
1973 if include_exceptionals
or exclude_outliers:
1975 exceptional_indices = np.in1d(finite_xs, exceptional_xs)
1978 if exclude_outliers:
1979 if not np.all(exceptional_indices):
1984 x_mean, x_std = np.nan, np.nan
1986 make_symmetric = abs(x_mean) < x_std / 5.0
and lower_bound
is None and upper_bound
is None
1988 if include_exceptionals
and len(exceptional_xs) != 0:
1989 lower_exceptional_x = np.min(exceptional_xs)
1990 upper_exceptional_x = np.max(exceptional_xs)
1991 make_symmetric =
False
1993 lower_exceptional_x = np.nan
1994 upper_exceptional_x = np.nan
1997 if lower_bound
is None:
1999 lower_bound = np.min(finite_xs)
2003 if outlier_z_score
is not None:
2005 lower_outlier_bound = x_mean - outlier_z_score * x_std
2009 indices_above_lower_outlier_bound = finite_xs >= lower_outlier_bound
2011 if np.any(indices_above_lower_outlier_bound):
2012 lower_bound = np.min(finite_xs[indices_above_lower_outlier_bound])
2015 lower_bound = np.nanmin([lower_bound, lower_exceptional_x])
2017 debug(
'Lower bound after outlier detection')
2018 debug(
'Lower bound %s', lower_bound)
2019 debug(
'Lower outlier bound %s', lower_outlier_bound)
2022 if upper_bound
is None:
2024 upper_bound = np.max(finite_xs)
2027 if outlier_z_score
is not None:
2029 upper_outlier_bound = x_mean + outlier_z_score * x_std
2033 indices_below_upper_outlier_bound = finite_xs <= upper_outlier_bound
2035 if np.any(indices_below_upper_outlier_bound):
2036 upper_bound = np.max(finite_xs[indices_below_upper_outlier_bound])
2039 upper_bound = np.nanmax([upper_bound, upper_exceptional_x])
2041 debug(
'Upper bound after outlier detection')
2042 debug(
'Upper bound %s', upper_bound)
2043 debug(
'Upper outlier bound %s', upper_outlier_bound)
2045 if make_symmetric
and lower_bound < 0
and upper_bound > 0:
2046 if abs(abs(lower_bound) - abs(upper_bound)) < x_std / 5.0:
2047 abs_bound = max(abs(lower_bound), abs(upper_bound))
2048 lower_bound = -abs_bound
2049 upper_bound = abs_bound
2051 return lower_bound, upper_bound
2055 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2059 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2060 Something having a GetListOfFunctions method that should hold
2061 the combined fit and additional statistics function.
2064 cls.
set_tf1set_tf1(histogram, additional_stats_tf1)
2068 """Combining fit TF1 with the additional statistics and attach them to the histogram.
2072 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2073 Something having a GetListOfFunctions method that should hold
2074 the combined fit and additional statistics function.
2078 cls.
set_tf1set_tf1(histogram, combined_tf1)
2082 """Set the attached TF1 of the histogram.
2086 histogram : ROOT.TH1 or ROOT.TGraph or ROOT.TMultiGraph
2087 Something having a GetListOfFunctions method that should hold
2088 the combined fit and additional statistics function.
2093 tf1.SetName(
"FitAndStats")
2094 histogram.GetListOfFunctions().Add(tf1)
2098 """Delete the attached TF1 from the histogram
2102 histogram : ROOT.TH1 or ROOT.TGraph
2103 Something having a GetListOfFunctions method that holds the fit function
2105 tf1 = histogram.FindObject(
"FitAndStats")
2107 function_list = histogram.GetListOfFunctions()
2108 function_list.Remove(tf1)
2112 """Create a TF1 with the additional statistics from the histogram as parameters.
2116 histogram : ROOT.TH1 or ROOT.TGraph
2117 Something having a GetListOfFunctions method that holds the additional statistics.
2122 Function with the additional statistics as parameters.
2126 if not additional_stats:
2136 formula_string =
'+'.join(
'0*[' + str(i) +
']' for i
in range(len(additional_stats)))
2139 additional_stats_tf1 = ROOT.TF1(
"Stats", formula_string, lower_bound, upper_bound)
2141 for (i, (label, value))
in enumerate(additional_stats.items()):
2145 label = label.replace(
" ",
"-")
2146 additional_stats_tf1.SetParName(i, label)
2147 additional_stats_tf1.FixParameter(i, value)
2149 return additional_stats_tf1
2153 """Combine the fit function and the function carrying the additional statistics to one function.
2159 additional_stats_tf1 : ROOT.TF1
2160 The function carrying the additional statistics as parameters
2166 if additional_stats_tf1
is None:
2172 lower_bound = ctypes.c_double()
2173 upper_bound = ctypes.c_double()
2174 fit_tf1.GetRange(lower_bound, upper_bound)
2175 title = fit_tf1.GetTitle()
2177 combined_formula = additional_stats_tf1.GetExpFormula().Data() +
'+' + fit_tf1.GetExpFormula().Data()
2178 combined_tf1 = ROOT.TF1(
"Combined", combined_formula, lower_bound.value, upper_bound.value)
2179 combined_tf1.SetTitle(title)
2182 chi2 = fit_tf1.GetChisquare()
2183 combined_tf1.SetChisquare(chi2)
2185 ndf = fit_tf1.GetNDF()
2186 combined_tf1.SetNDF(ndf)
2188 n_stats_parameters = additional_stats_tf1.GetNpar()
2191 cls.
copy_tf1_parameterscopy_tf1_parameters(fit_tf1, combined_tf1, offset=n_stats_parameters)
2197 """Copy the parameters of one TF1 to another.
2201 tf1_source : ROOT.TF1
2202 Take parameters from here
2203 tf1_target : ROOT.TF1
2205 offset : int, optional
2206 Index of the first target parameter to which to copy.
2208 n_parameters = tf1_source.GetNpar()
2211 lower_bound = ctypes.c_double()
2212 upper_bound = ctypes.c_double()
2214 for i_source
in range(n_parameters):
2215 parameter_name = tf1_source.GetParName(i_source)
2216 i_target = tf1_target.GetParNumber(parameter_name)
2220 for i_target
in range(tf1_target.GetNpar()):
2221 if parameter_name == tf1_target.GetParName(i_target):
2227 tf1_target.SetParameter(i_target,
2228 tf1_source.GetParameter(i_source))
2229 tf1_target.SetParError(i_target,
2230 tf1_source.GetParError(i_source))
2232 tf1_source.GetParLimits(i_source, lower_bound, upper_bound)
2233 tf1_target.SetParLimits(i_target, lower_bound.value, upper_bound.value)
2236 """Reassign the special attributes of the plot forwarding them to the ROOT plot."""
2253 """Sets the maximum of the vertical plotable range"""
2255 if isinstance(histogram, ROOT.TH1):
2256 histogram.SetMaximum(histogram.GetMaximum(maximum))
2258 histogram.SetMaximum(maximum)
2261 """Sets the minimum of the vertical plotable range"""
2263 if isinstance(histogram, ROOT.TH1):
2264 histogram.SetMinimum(histogram.GetMinimum(minimum))
2266 histogram.SetMinimum(minimum)
2270 """Set the style such that the additional stats entries are shown by the TBrowser"""
2271 belle2_validation_style_name =
"belle2_validation_style"
2272 belle2_validation_tstyle = ROOT.gROOT.GetStyle(belle2_validation_style_name)
2273 if not belle2_validation_tstyle:
2274 belle2_validation_tstyle = ROOT.TStyle(belle2_validation_style_name, belle2_validation_style_name)
2277 belle2_validation_tstyle.SetOptFit(opt_fit)
2280 belle2_validation_tstyle.SetOptStat(opt_stat)
2281 ROOT.gROOT.SetStyle(belle2_validation_style_name)
2286 belle2_validation_tstyle.cd()
2290 """Simple test methode"""
2291 ValidationPlot.set_tstyle()
2294 normal_distributed_values = np.random.randn(1000)
2297 normal_distributed_values[i] = np.nan
2299 for i
in range(10, 20):
2300 normal_distributed_values[i] = np.inf
2302 for i
in range(20, 30):
2303 normal_distributed_values[i] = -np.inf
2306 validation_histogram.hist(normal_distributed_values)
2307 validation_histogram.title =
'A normal distribution'
2308 validation_histogram.xlabel =
'normal'
2309 validation_histogram.ylabel =
'frequency'
2310 validation_histogram.fit_gaus()
2314 cumulated_histogram.hist(normal_distributed_values, cumulation_direction=1)
2315 cumulated_histogram.title =
'A cumulated normal distribution'
2316 cumulated_histogram.xlabel =
'normal'
2317 cumulated_histogram.ylabel =
'cdf'
2318 cumulated_histogram.show()
2322 stackby = np.random.binomial(1.0, 0.40, 1000)
2323 stacked_validation_histogram =
ValidationPlot(
'test_stacked_hist')
2324 stacked_validation_histogram.hist(normal_distributed_values, stackby=stackby)
2327 x = np.random.randn(1000)
2328 y = 3 * np.random.randn(1000)
2329 ones = np.ones_like(x)
2332 x1 = np.where(stackby != 0, np.cos(angle) * ones, ones) * x + np.where(stackby != 0, np.sin(angle) * ones, ones) * y
2333 y1 = np.where(stackby != 0, np.sin(angle) * ones, ones) * x - np.where(stackby != 0, np.cos(angle) * ones, ones) * y
2335 stacked_validation_scatter =
ValidationPlot(
'test_stacked_scatter')
2336 stacked_validation_scatter.scatter(x1, y1, stackby=stackby)
2339 stacked_validation_profile =
ValidationPlot(
'test_stacked_profile')
2340 stacked_validation_profile.profile(x1, y1, stackby=stackby)
2343 stacked_validation_hist2d =
ValidationPlot(
'test_stacked_hist2d')
2344 stacked_validation_hist2d.hist2d(x1, y1, stackby=stackby)
2347 x = np.linspace(-1, 1, 1000)
2351 diagonal_plot.profile(x, y, bins=50)
2352 diagonal_plot.fit_line()
2356 cumulated_profile.profile(x, y, bins=50, cumulation_direction=1)
2358 tfile = ROOT.TFile(
'test.root',
'RECREATE')
2360 validation_histogram.write(tfile)
2362 with root_cd(
"expert")
as tdirectory1:
2363 diagonal_plot.write(tdirectory1)
2364 cumulated_profile.write(tdirectory1)
2365 cumulated_histogram.write(tdirectory1)
2367 with root_cd(
"stacked")
as tdirectory2:
2368 stacked_validation_histogram.write(tdirectory2)
2369 stacked_validation_scatter.write()
2370 stacked_validation_profile.write()
2371 stacked_validation_hist2d.write()
2375 tfile = ROOT.TFile(
'test.root')
2376 tBrowser = ROOT.TBrowser()
2377 tBrowser.BrowseObject(tfile)
2382 if __name__ ==
'__main__':
title
cached value of the title for this plot
def format_bin_label(value)
pvalue_warn
custom levels for pvalue warnings
def hist(self, xs, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, is_expert=True)
y_log
Indicator whether the y axes should be displayed as a log scale.
def create_stack(cls, histograms, name, reverse_stack, force_graph=False)
def create_additional_stats_tf1(cls, histogram)
description
description of the plot
contact
contact information for this plot
upper_bound
upper right corner of the hisogram
def write(self, tdirectory=None)
def fit_gaus(self, z_score=None)
ylabel
default label for the histogram's Y axis
int very_sparse_dots_line_style_index
A an index that reference to a dot spacing such that the line is almost invisible for scatter.
def unpack_2d_param(param)
def is_discrete(xs, max_n_unique=None)
_contact
Contact email address for display on the validation page.
pvalue_error
custom levels for pvalue errors
def grapherrors(self, xs_and_err, ys_and_err, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
def delete_tf1(cls, histogram)
_title
Title of the validation plot.
def create_1d(self, th1_factory, xs, ys=None, weights=None, bins=None, stackby=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None)
def description(self, description)
def __init__(self, name, referenceFileName=None)
def add_stats_entry(cls, histogram, label, value)
def contact(self, contact)
_xlabel
X axes label of the validation plot.
xlabel
cached value of the x-axis label for this plot
def set_tf1(cls, histogram, tf1)
def fill_into(self, plot, xs, ys=None, weights=None, filter=None)
def combine_fit_and_additional_stats(cls, fit_tf1, additional_stats_tf1)
def create(self, histogram_template, xs, ys=None, weights=None, stackby=None, cumulation_direction=None, reverse_stack=None)
def set_maximum(self, maximum)
def attach_attributes(self)
def add_nan_inf_stats(cls, histogram, name, xs)
def set_color(self, tobject, root_i_color)
def get_robust_mean_and_std(xs)
def cumulate(cls, histogram, cumulation_direction=None)
def copy_tf1_parameters(cls, tf1_source, tf1_target, offset=0)
def profile(self, xs, ys, weights=None, stackby=None, bins=None, lower_bound=None, upper_bound=None, y_binary=None, y_log=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False, cumulation_direction=None, gaus_z_score=None, is_expert=True)
_is_expert
per default all plots are expert and must be set to non-expert explicitly
def fit(self, formula, options, lower_bound=None, upper_bound=None, z_score=None)
_description
Description of the plot purpose for display on the validation page.
histograms
A list of the histograms that make up the plot.
def get_additional_stats(cls, histogram)
_check
Detailed check instructions for display on the validation page.
name
A unique name to be used as the name of the ROOT object to be generated.
def set_fit_tf1(cls, histogram, fit_tf1)
def set_minimum(self, minimum)
def fill_into_grouped(self, histogram_template, xs, ys=None, weights=None, groupbys=None, groupby_label="group")
plot
The main plot object, may contain one or more (in case of stacked pltos) histograms.
def determine_range(self, xs, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
def set_additional_stats_tf1(cls, histogram)
def scatter(self, xs, ys, stackby=None, lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), max_n_data=100000, is_expert=True)
_ylabel
Y axes label of the validation plot.
referenceFileName
name of the reference file, if not None the binning will be read from there
def fill_into_tgrapherror(self, graph, xs, ys, filter=None)
def hist2d(self, xs, ys, weights=None, stackby=None, bins=(None, None), lower_bound=(None, None), upper_bound=(None, None), outlier_z_score=(None, None), include_exceptionals=(True, True), allow_discrete=(False, False), quantiles=None, is_expert=True)
lower_bound
lower left corner of the histogram
def determine_bin_range(self, xs, stackbys=None, n_bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True)
def determine_bin_edges(self, xs, stackbys=None, bins=None, lower_bound=None, upper_bound=None, outlier_z_score=None, include_exceptionals=True, allow_discrete=False)
def gaus_slice_fit(cls, th2, name, z_score=None)
check
cached value of the user-check action for this plot
def fill_into_th1(self, histogram, xs, ys=None, weights=None, filter=None)
def get_exceptional_values(xs)
def convert_tprofile_to_tgrapherrors(cls, tprofile, abs_x=False)
def fill_into_tgraph(self, graph, xs, ys, filter=None)
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)