18import matplotlib.pyplot
as plt
19import matplotlib.artist
20import matplotlib.figure
21import matplotlib.gridspec
22import matplotlib.colors
23import matplotlib.patches
24import matplotlib.ticker
25import matplotlib.patheffects
as PathEffects
28from basf2_mva_evaluation
import histogram
40plt.style.use(
"belle2")
45 Base class for all Plotters.
79 Creates a new figure and axis
if None is given, sets the default plot parameters
80 @param figure default draw figure which
is used
81 @param axis default draw axis which
is used
83 b2.B2INFO("Create new figure for class " + str(type(self)))
86 self.
figurefigure = matplotlib.figure.Figure(figsize=(12, 8), dpi=120)
125 self.
prop_cycler = itertools.cycle(plt.rcParams[
"axes.prop_cycle"])
129 Adds a new subplot to the figure, updates all other axes
130 according to the given gridspec
131 @param gridspecs gridspecs
for all axes including the new one
133 for gs, ax
in zip(gridspecs[:-1], self.
figurefigure.axes):
135 ax.set_subplotspec(gs)
141 Save the figure into a file
142 @param filename of the file
144 b2.B2INFO("Save figure for class " + str(type(self)))
145 from matplotlib.backends.backend_agg
import FigureCanvasAgg
as FigureCanvas
147 canvas.print_figure(filename, dpi=50)
152 Overrides default plot options for datapoint plot
153 @param plot_kwargs keyword arguments
for the plot function
160 Overrides default errorbar options for datapoint errorbars
161 @param errorbar_kwargs keyword arguments
for the errorbar function
168 Overrides default errorband options for datapoint errorband
169 @param errorbar_kwargs keyword arguments
for the fill_between function
176 Overrides default fill_between options for datapoint errorband
177 @param fill_kwargs keyword arguments
for the fill_between function
184 Plot the given datapoints, with plot, errorbar
and make a errorband
with fill_between
185 @param x coordinates of the data points
186 @param y coordinates of the data points
187 @param xerr symmetric error on x data points
188 @param yerr symmetric error on y data points
196 if plot_kwargs
is None or 'color' not in plot_kwargs:
198 color = color[
'color']
199 plot_kwargs[
'color'] = color
201 color = plot_kwargs[
'color']
202 color = matplotlib.colors.ColorConverter().to_rgb(color)
203 patch = matplotlib.patches.Patch(color=color, alpha=0.5)
204 patch.get_color = patch.get_facecolor
207 if plot_kwargs
is not None:
208 p, = axis.plot(x, y, rasterized=
True, **plot_kwargs)
211 if errorbar_kwargs
is not None and (xerr
is not None or yerr
is not None):
212 if 'color' not in errorbar_kwargs:
213 errorbar_kwargs[
'color'] = color
214 if 'ecolor' not in errorbar_kwargs:
215 errorbar_kwargs[
'ecolor'] = [0.5 * x
for x
in color]
220 if not isinstance(xerr, (numpy.ndarray, list)):
221 xerr = xerr*numpy.ones(len(x))
222 mask = numpy.logical_and.reduce([numpy.isfinite(v)
for v
in [x, y, xerr, yerr]])
225 x[mask], y[mask], xerr=numpy.where(
226 xerr[mask] < 0, 0.0, xerr[mask]), yerr=numpy.where(
227 yerr[mask] < 0, 0.0, yerr[mask]), rasterized=
True, **errorbar_kwargs)
230 if errorband_kwargs
is not None and yerr
is not None:
231 if 'color' not in errorband_kwargs:
232 errorband_kwargs[
'color'] = color
237 for _x, _y, _xe, _ye
in zip(x, y, xerr, yerr):
238 axis.add_patch(matplotlib.patches.Rectangle((_x - _xe, _y - _ye), 2 * _xe, 2 * _ye, rasterized=
True,
241 f = axis.fill_between(x, y - yerr, y + yerr, interpolate=
True, rasterized=
True, **errorband_kwargs)
243 if fill_kwargs
is not None:
245 x = numpy.append(x, x[-1]+2*xerr[-1])
246 y = numpy.append(y, y[-1])
247 xerr = numpy.append(xerr, xerr[-1])
249 axis.fill_between(x-xerr, y, 0, rasterized=
True, **fill_kwargs)
251 return (tuple(patches), p, e, f)
253 def add(self, *args, **kwargs):
255 Add a new plot to this plotter
257 return NotImplemented
261 Finish plotting and set labels, legends
and stuff
263 return NotImplemented
267 Scale limits to increase distance to boundaries
278 Plots the purity and the efficiency over the cut value (
for cut choosing)
285 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
287 Add a new curve to the plot
288 @param data pandas.DataFrame containing all data
289 @param column which
is used to calculate efficiency
and purity
for different cuts
290 @param signal_mask boolean numpy.array defining which events are signal events
291 @param bckgrd_mask boolean numpy.array defining which events are background events
292 @param weight_column column
in data containing the weights
for each event
295 hists = histogram.Histograms(data, column, {'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
298 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
299 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
301 efficiency, efficiency_error = hists.get_true_positives([
'Signal'])
302 purity, purity_error = hists.get_false_positives([
'Background'])
304 cuts = hists.bin_centers
308 numpy.nanmax([numpy.nanmax(efficiency), numpy.nanmax(purity), self.
ymaxymaxymax])
328 Sets limits, title, axis-labels and legend of the plot
332 self.axisaxis.set_title("Classification Plot")
333 self.
axisaxis.get_xaxis().set_label_text(
'Cut Value')
340 Plots the signal to noise ratio over the cut value (for cut choosing)
347 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
349 Add a new curve to the plot
350 @param data pandas.DataFrame containing all data
351 @param column which
is used to calculate signal to noise ratio
for different cuts
352 @param signal_mask boolean numpy.array defining which events are signal events
353 @param bckgrd_mask boolean numpy.array defining which events are background events
354 @param weight_column column
in data containing the weights
for each event
357 hists = histogram.Histograms(data, column, {'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
359 signal2noise, signal2noise_error = hists.get_signal_to_noise([
'Signal'], [
'Background'])
361 cuts = hists.bin_centers
365 numpy.nanmax([numpy.nanmax(signal2noise), self.
ymaxymaxymax])
375 Sets limits, title, axis-labels and legend of the plot
379 self.axisaxis.set_title("Signal to Noise Plot")
380 self.
axisaxis.get_xaxis().set_label_text(
'Cut Value')
387 Plots the purity over the efficiency also known as ROC curve
394 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
396 Add a new curve to the ROC plot
397 @param data pandas.DataFrame containing all data
398 @param column which
is used to calculate efficiency
and purity
for different cuts
399 @param signal_mask boolean numpy.array defining which events are signal events
400 @param bckgrd_mask boolean numpy.array defining which events are background events
401 @param weight_column column
in data containing the weights
for each event
403 hists = histogram.Histograms(data, column, {'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
404 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
405 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
412 if label
is not None:
420 Sets limits, title, axis-labels and legend of the plot
424 self.axisaxis.set_title("ROC Purity Plot")
425 self.
axisaxis.get_xaxis().set_label_text(
'Efficiency')
426 self.
axisaxis.get_yaxis().set_label_text(
'Purity')
433 Plots the rejection over the efficiency also known as ROC curve
440 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
442 Add a new curve to the ROC plot
443 @param data pandas.DataFrame containing all data
444 @param column which
is used to calculate efficiency
and purity
for different cuts
445 @param signal_mask boolean numpy.array defining which events are signal events
446 @param bckgrd_mask boolean numpy.array defining which events are background events
447 @param weight_column column
in data containing the weights
for each event
449 hists = histogram.Histograms(data, column, {'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
450 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
451 rejection, rejection_error = hists.get_efficiency([
'Background'])
452 rejection = 1 - rejection
453 if isinstance(efficiency, int)
and not isinstance(rejection, int):
454 efficiency = numpy.array([efficiency] * len(rejection))
455 elif isinstance(rejection, int)
and not isinstance(efficiency, int):
456 rejection = numpy.array([rejection] * len(efficiency))
457 elif isinstance(rejection, int)
and isinstance(efficiency, int):
458 efficiency = numpy.array([efficiency])
459 rejection = numpy.array([rejection])
464 auc = numpy.abs(numpy.trapz(rejection, efficiency))
468 if label
is not None:
476 Sets limits, title, axis-labels and legend of the plot
480 self.axisaxis.set_title("ROC Rejection Plot")
481 self.
axisaxis.get_xaxis().set_label_text(
'Signal Efficiency')
482 self.
axisaxis.get_yaxis().set_label_text(
'Background Rejection')
489 Plots multiple other plots into a grid 3x?
496 def __init__(self, cls, number_of_plots, figure=None):
498 Creates a new figure if None is given, sets the default plot parameters
499 @param figure default draw figure which
is used
508 if number_of_plots == 1:
509 gs = matplotlib.gridspec.GridSpec(1, 1)
510 elif number_of_plots == 2:
511 gs = matplotlib.gridspec.GridSpec(1, 2)
512 elif number_of_plots == 3:
513 gs = matplotlib.gridspec.GridSpec(1, 3)
515 gs = matplotlib.gridspec.GridSpec(int(numpy.ceil(number_of_plots / 3)), 3)
523 def add(self, i, *args, **kwargs):
525 Call add function of ith subplot
526 @param i position of the subplot
532 Sets limits, title, axis-labels and legend of the plot
541 Plots the purity in each bin over the classifier output.
548 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None):
550 Add a new curve to the Diagonal plot
551 @param data pandas.DataFrame containing all data
552 @param column which
is used to calculate purity
for different cuts
553 @param signal_mask boolean numpy.array defining which events are signal events
554 @param bckgrd_mask boolean numpy.array defining which events are background events
555 @param weight_column column
in data containing the weights
for each event
557 hists = histogram.Histograms(data, column, {'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
558 purity, purity_error = hists.get_purity_per_bin([
'Signal'], [
'Background'])
571 Sets limits, title, axis-labels and legend of the plot
574 self.axisaxis.plot((0.0, 1.0), (0.0, 1.0), color='black')
577 self.
axisaxis.set_title(
"Diagonal Plot")
578 self.
axisaxis.get_xaxis().set_label_text(
'Classifier Output')
579 self.
axisaxis.get_yaxis().set_label_text(
'Purity Per Bin')
586 Plots distribution of a quantity
589 def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False,
590 keep_first_binning=False, range_in_std=None):
592 Creates a new figure and axis
if None is given, sets the default plot parameters
593 @param figure default draw figure which
is used
594 @param axis default draw axis which
is used
595 @param normed true
if histograms should be normed before drawing
596 @param keep_first_binning use the binning of the first distribution
for further plots
597 @param range_in_std show only the data
in a windows around +- range_in_std * standard_deviation around the mean
622 def add(self, data, column, mask=None, weight_column=None, label=None):
624 Add a new distribution to the plots
625 @param data pandas.DataFrame containing all data
626 @param column which
is used to calculate distribution histogram
627 @param mask boolean numpy.array defining which events are used
for the histogram
628 @param weight_column column
in data containing the weights
for each event
631 mask = numpy.ones(len(data)).astype(
'bool')
637 bins=bins, equal_frequency=
False, range_in_std=self.
range_in_std)
640 hist, hist_error = hists.get_hist(
'Total')
643 normalization = float(numpy.sum(hist))
644 hist = hist / normalization
645 hist_error = hist_error / normalization
648 hist = hist / hists.bin_widths
649 hist_error = hist_error / hists.bin_widths
661 appendix =
' No data to plot!'
671 Sets limits, title, axis-labels and legend of the plot
673 self.axisaxis.set_title("Distribution Plot")
681 self.
axisaxis.text(0.36, 0.5,
'No data to plot', fontsize=60, color=
'black')
690 self.
axisaxis.get_yaxis().set_label_text(
'# Entries per Bin / (# Entries * Bin Width)')
692 self.
axisaxis.get_yaxis().set_label_text(
'# Entries per Bin / # Entries')
694 self.
axisaxis.get_yaxis().set_label_text(
'# Entries per Bin / Bin Width')
696 self.
axisaxis.get_yaxis().set_label_text(
'# Entries per Bin')
708 def __init__(self, figure=None, axis=None, x_axis_label=None):
710 Creates a new figure and axis
if None is given, sets the default plot parameters
711 @param figure default draw figure which
is used
712 @param axis default draw axis which
is used
714 super().__init__(figure=figure, axis=axis)
719 def add(self, data, column, mask=None, weight_column=None):
721 Add a new boxplot to the plots
722 @param data pandas.DataFrame containing all data
723 @param column which
is used to calculate boxplot quantities
724 @param mask boolean numpy.array defining which events are used
for the histogram
725 @param weight_column column
in data containing the weights
for each event
728 mask = numpy.ones(len(data)).astype(
'bool')
729 x = data[column][mask]
730 if weight_column
is not None:
732 b2.B2WARNING(
"Weights are currently not used in boxplot, due to limitations in matplotlib")
735 b2.B2WARNING(
"Ignore empty boxplot.")
739 p = self.
axisaxis.boxplot(x, sym=
'k.', whis=1.5, vert=
False, patch_artist=
True, showmeans=
True, widths=1,
740 boxprops=dict(facecolor=
'blue', alpha=0.5), showfliers=
False,
749 self.axisaxis.text(0.1, 0.9, (r'$ \mu = {:.2f}$' +
'\n' +
r'$median = {:.2f}$').format(x.mean(), x.median()),
750 fontsize=28, verticalalignment=
'top', horizontalalignment=
'left', transform=self.
axisaxis.transAxes)
751 self.
axisaxis.text(0.4, 0.9, (
r'$ \sigma = {:.2f}$' +
'\n' +
r'$IQD = {:.2f}$').format(x.std(),
752 x.quantile(0.75) - x.quantile(0.25)),
753 fontsize=28, verticalalignment=
'top', horizontalalignment=
'left', transform=self.
axisaxis.transAxes)
754 self.
axisaxis.text(0.7, 0.9, (
r'$min = {:.2f}$' +
'\n' +
r'$max = {:.2f}$').format(x.min(), x.max()),
755 fontsize=28, verticalalignment=
'top', horizontalalignment=
'left', transform=self.
axisaxis.transAxes)
762 Sets limits, title, axis-labels and legend of the plot
764 matplotlib.artist.setp(self.axisaxis.get_yaxis(), visible=False)
772 Plots the difference between two histograms
787 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
789 Creates a new figure and axis
if None is given, sets the default plot parameters
790 @param figure default draw figure which
is used
791 @param axis default draw axis which
is used
792 @param normed normalize minuend
and subtrahend before comparing them
793 @param shift_to_zero mean difference
is shifted to zero, to remove constant offset due to e.g. different sample sizes
805 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None):
807 Add a new difference plot
808 @param data pandas.DataFrame containing all data
809 @param column which
is used to calculate distribution histogram
810 @param minuend_mask boolean numpy.array defining which events are
for the minuend histogram
811 @param subtrahend_mask boolean numpy.array defining which events are
for the subtrahend histogram
812 @param weight_column column
in data containing the weights
for each event
813 @param label label
for the legend
if None, the column name
is used
815 hists = histogram.Histograms(data, column, {'Minuend': minuend_mask,
'Subtrahend': subtrahend_mask},
816 weight_column=weight_column, equal_frequency=
False)
817 minuend, minuend_error = hists.get_hist(
'Minuend')
818 subtrahend, subtrahend_error = hists.get_hist(
'Subtrahend')
822 difference_error = difference_error / (numpy.sum(minuend) + numpy.sum(subtrahend))
823 minuend = minuend / numpy.sum(minuend)
824 subtrahend = subtrahend / numpy.sum(subtrahend)
825 difference = minuend - subtrahend
828 difference = difference - numpy.mean(difference)
834 p = self.
_plot_datapoints(self.
axisaxis, hists.bin_centers, difference, xerr=hists.bin_widths / 2, yerr=difference_error)
845 Sets limits, title, axis-labels and legend of the plot
851 self.
axisaxis.set_title(
"Difference Plot")
852 self.
axisaxis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
854 self.
axisaxis.get_yaxis().set_label_text(
'Diff.')
861 Create TMVA-like overtraining control plot for a classification training
875 Creates a new figure if None is given, sets the default plot parameters
876 @param figure default draw figure which
is used
885 gs = matplotlib.gridspec.GridSpec(5, 1)
895 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None):
897 Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
898 otherwise there are too many curves in the plot to recognize anything
in the plot.
899 @param data pandas.DataFrame containing all data
900 @param column which
is used to calculate distribution histogram
901 @param train_mask boolean numpy.array defining which events are training events
902 @param test_mask boolean numpy.array defining which events are test events
903 @param signal_mask boolean numpy.array defining which events are signal events
904 @param bckgrd_mask boolean numpy.array defining which events are background events
905 @param weight_column column
in data containing the weights
for each event
912 distribution.add(data, column, test_mask & signal_mask, weight_column)
913 distribution.add(data, column, test_mask & bckgrd_mask, weight_column)
915 distribution.set_plot_options(
916 {
'color': distribution.plots[0][0][0].get_color(),
'linestyle':
'-',
'lw': 4,
'drawstyle':
'steps-mid'})
917 distribution.set_fill_options({
'color': distribution.plots[0][0][0].get_color(),
'alpha': 0.5,
'step':
'post'})
918 distribution.set_errorbar_options(
None)
919 distribution.set_errorband_options(
None)
920 distribution.add(data, column, train_mask & signal_mask, weight_column)
921 distribution.set_plot_options(
922 {
'color': distribution.plots[1][0][0].get_color(),
'linestyle':
'-',
'lw': 4,
'drawstyle':
'steps-mid'})
923 distribution.set_fill_options({
'color': distribution.plots[1][0][0].get_color(),
'alpha': 0.5,
'step':
'post'})
924 distribution.add(data, column, train_mask & bckgrd_mask, weight_column)
926 distribution.labels = [
'Test-Signal',
'Test-Background',
'Train-Signal',
'Train-Background']
927 distribution.finish()
929 self.
plot_kwargs[
'color'] = distribution.plots[0][0][0].get_color()
931 difference_signal.set_plot_options(self.
plot_kwargs)
934 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column)
935 self.
axis_d1axis_d1.set_xlim((difference_signal.xmin, difference_signal.xmax))
936 self.
axis_d1axis_d1.set_ylim((difference_signal.ymin, difference_signal.ymax))
937 difference_signal.plots = difference_signal.labels = []
938 difference_signal.finish(line_color=distribution.plots[0][0][0].get_color())
940 self.
plot_kwargs[
'color'] = distribution.plots[1][0][0].get_color()
942 difference_bckgrd.set_plot_options(self.
plot_kwargs)
945 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column)
946 self.
axis_d2axis_d2.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
947 self.
axis_d2axis_d2.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
948 difference_bckgrd.plots = difference_bckgrd.labels = []
949 difference_bckgrd.finish(line_color=distribution.plots[1][0][0].get_color())
954 if len(data[column][train_mask & signal_mask]) == 0
or len(data[column][test_mask & signal_mask]) == 0:
955 b2.B2WARNING(
"Cannot calculate kolmogorov smirnov test for signal due to missing data")
957 ks = scipy.stats.ks_2samp(data[column][train_mask & signal_mask], data[column][test_mask & signal_mask])
958 props = dict(boxstyle=
'round', edgecolor=
'gray', facecolor=
'white', linewidth=0.1, alpha=0.5)
959 self.
axis_d1axis_d1.text(0.1, 0.9,
r'signal (train - test) difference $p={:.2f}$'.format(ks[1]), bbox=props,
960 verticalalignment=
'top', horizontalalignment=
'left', transform=self.
axis_d1axis_d1.transAxes)
961 if len(data[column][train_mask & bckgrd_mask]) == 0
or len(data[column][test_mask & bckgrd_mask]) == 0:
962 b2.B2WARNING(
"Cannot calculate kolmogorov smirnov test for background due to missing data")
964 ks = scipy.stats.ks_2samp(data[column][train_mask & bckgrd_mask], data[column][test_mask & bckgrd_mask])
965 props = dict(boxstyle=
'round', edgecolor=
'gray', facecolor=
'white', linewidth=0.1, alpha=0.5)
966 self.
axis_d2axis_d2.text(0.1, 0.9,
r'background (train - test) difference $p={:.2f}$'.format(ks[1]),
968 verticalalignment=
'top', horizontalalignment=
'left', transform=self.
axis_d2axis_d2.transAxes)
970 b2.B2WARNING(
"Cannot calculate kolmogorov smirnov test please install scipy!")
976 Sets limits, title, axis-labels and legend of the plot
981 matplotlib.artist.setp(self.
axisaxisaxisaxis.get_xticklabels(), visible=
False)
982 matplotlib.artist.setp(self.
axis_d1axis_d1.get_xticklabels(), visible=
False)
985 self.
axis_d2axis_d2.get_xaxis().set_label_text(
'Classifier Output')
991 Plots distribution of a quantity including boxplots
997 def __init__(self, figure=None, axis=None, normed=False, range_in_std=None, x_axis_label=None):
999 Creates a new figure and axis
if None is given, sets the default plot parameters
1000 @param figure default draw figure which
is used
1001 @param axis default draw axis which
is used
1002 @param normed true
if the histograms should be normed before drawing
1003 @param range_in_std show only the data
in a windows around +- range_in_std * standard_deviation around the mean
1017 def add(self, data, column, mask=None, weight_column=None, label=None):
1019 Add a new distribution plot, with additional information like a boxplot compared to
1020 the ordinary Distribution plot.
1021 @param data pandas.DataFrame containing all data
1022 @param column which
is used to calculate distribution histogram
1023 @param mask boolean numpy.array defining which events are used
for the distribution histogram
1024 @param weight_column column
in data containing the weights
for each event
1032 gs = matplotlib.gridspec.GridSpec(4 * n, 1)
1033 gridspecs = [gs[:3 * n, :]] + [gs[3 * n + i, :] for i
in range(n)]
1039 mask = mask & (data[column] > (mean - self.
range_in_std * std)) & (data[column] < (mean + self.
range_in_std * std))
1041 box.add(data, column, mask, weight_column)
1042 if len(box.plots) > 0:
1043 box.plots[0][
'boxes'][0].set_facecolor(self.
distribution.plots[-1][0][0].get_color())
1051 Sets limits, title, axis-labels and legend of the plot
1054 matplotlib.artist.setp(self.axisaxis.get_xticklabels(), visible=False)
1055 self.
axisaxis.get_xaxis().set_label_text(
'')
1057 matplotlib.artist.setp(box_axis.get_xticklabels(), visible=
False)
1058 box_axis.set_title(
"")
1059 box_axis.get_xaxis().set_label_text(
'')
1061 self.
axisaxis.set_title(
"Distribution Plot")
1063 loc=
'best', fancybox=
True, framealpha=0.5)
1069 Plots change of a distribution of a quantity depending on the cut on a classifier
1082 Creates a new figure if None is given, sets the default plot parameters
1083 @param figure default draw figure which
is used
1092 gs = matplotlib.gridspec.GridSpec(3, 2)
1102 def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None):
1104 Add a new correlation plot.
1105 @param data pandas.DataFrame containing all data
1106 @param column which
is used to calculate distribution histogram
1107 @param cut_column which
is used to calculate cut on the other quantity defined by column
1108 @param quantiles list of quantiles between 0
and 100, defining the different cuts
1109 @param weight_column column
in data containing the weights
for each event
1111 if len(data[cut_column]) == 0:
1112 b2.B2WARNING(
"Ignore empty Correlation.")
1117 for i, (l, m)
in enumerate([(
'.', signal_mask | bckgrd_mask), (
'S', signal_mask), (
'B', bckgrd_mask)]):
1119 if weight_column
is not None:
1120 weights = numpy.array(data[weight_column][m])
1122 weights = numpy.ones(len(data[column][m]))
1124 xrange = numpy.percentile(data[column][m], [5, 95])
1126 colormap = plt.get_cmap(
'coolwarm')
1127 tmp, x = numpy.histogram(data[column][m], bins=100,
1128 range=xrange, density=
True, weights=weights)
1129 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1130 axes[i].
plot(bin_center, tmp, color=
'black', lw=1)
1132 for quantil
in numpy.arange(5, 100, 5):
1133 cut = numpy.percentile(data[cut_column][m], quantil)
1134 sel = data[cut_column][m] >= cut
1135 y, x = numpy.histogram(data[column][m][sel], bins=100,
1136 range=xrange, density=
True, weights=weights[sel])
1137 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1138 axes[i].fill_between(bin_center, tmp, y, color=colormap(quantil / 100.0))
1141 axes[i].set_ylim(bottom=0)
1144 axes[i].set_title(
r'Distribution for different quantiles: $\mathrm{{Flatness}}_{} = {:.3f}$'.format(l, flatness_score))
1149 Sets limits, title, axis-labels and legend of the plot
1156 Plots multivariate distribution using TSNE algorithm
1159 def add(self, data, columns, *masks):
1161 Add a new correlation plot.
1162 @param data pandas.DataFrame containing all data
1163 @param columns which are used to calculate the correlations
1164 @param masks different classes to show
in TSNE
1168 import sklearn.manifold
1169 model = sklearn.manifold.TSNE(n_components=2, random_state=0)
1170 data = numpy.array([data[column]
for column
in columns]).T
1173 data = numpy.array([data[column][mask]
for column
in columns]).T
1174 data = model.transform(data)
1175 self.
axisaxis.scatter(data[:, 0], data[:, 1], rasterized=
True)
1177 print(
"Cannot create TSNE plot. Install sklearn if you want it")
1182 Sets limits, title, axis-labels and legend of the plot
1189 Plots importance matrix
1192 def add(self, data, columns, variables):
1194 Add a new correlation plot.
1195 @param data pandas.DataFrame containing all data
1196 @param columns which are used to calculate the correlations
1201 width = (numpy.max(x) - numpy.min(x))
1203 return numpy.zeros(x.shape)
1204 return (x - numpy.min(x)) / width * 100
1206 importance_matrix = numpy.vstack([norm(data[column])
for column
in columns]).T
1207 importance_heatmap = self.
axisaxis.pcolor(importance_matrix, cmap=plt.cm.RdBu, vmin=0.0, vmax=100,
1211 self.
axisaxis.set_yticks(numpy.arange(importance_matrix.shape[0]) + 0.5, minor=
False)
1212 self.
axisaxis.set_xticks(numpy.arange(importance_matrix.shape[1]) + 0.5, minor=
False)
1214 self.
axisaxis.set_xticklabels(columns, minor=
False, rotation=90)
1215 self.
axisaxis.set_yticklabels(variables, minor=
False)
1219 for y
in range(importance_matrix.shape[0]):
1220 for x
in range(importance_matrix.shape[1]):
1221 txt = self.
axisaxis.text(x + 0.5, y + 0.5, f
'{importance_matrix[y, x]:.0f}',
1223 horizontalalignment=
'center',
1224 verticalalignment=
'center',
1226 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground=
'k')])
1228 cb = self.
figurefigure.colorbar(importance_heatmap, ticks=[0.0, 100], orientation=
'vertical')
1229 cb.ax.set_yticklabels([
'low',
'high'])
1232 self.
axisaxis.set_ylim(0, importance_matrix.shape[0])
1240 Sets limits, title, axis-labels and legend of the plot
1247 Plots correlation matrix
1258 Creates a new figure if None is given, sets the default plot parameters
1259 @param figure default draw figure which
is used
1268 gs = matplotlib.gridspec.GridSpec(8, 2)
1280 def add(self, data, columns, signal_mask, bckgrd_mask):
1282 Add a new correlation plot.
1283 @param data pandas.DataFrame containing all data
1284 @param columns which are used to calculate the correlations
1286 signal_corr = numpy.corrcoef(numpy.vstack([data[column][signal_mask] for column
in columns])) * 100
1287 bckgrd_corr = numpy.corrcoef(numpy.vstack([data[column][bckgrd_mask]
for column
in columns])) * 100
1289 signal_heatmap = self.
signal_axissignal_axis.pcolor(signal_corr, cmap=plt.cm.RdBu, vmin=-100.0, vmax=100.0)
1311 for y
in range(signal_corr.shape[0]):
1312 for x
in range(signal_corr.shape[1]):
1315 horizontalalignment=
'center',
1316 verticalalignment=
'center',
1318 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground=
'k')])
1320 for y
in range(bckgrd_corr.shape[0]):
1321 for x
in range(bckgrd_corr.shape[1]):
1324 horizontalalignment=
'center',
1325 verticalalignment=
'center',
1327 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground=
'k')])
1330 cb.solids.set_rasterized(
True)
1331 cb.ax.set_xticklabels([
'negative',
'uncorrelated',
'positive'])
1345 Sets limits, title, axis-labels and legend of the plot
1351if __name__ ==
'__main__':
1353 def get_data(N, columns):
1355 Creates fake data for example plots
1358 n = len(columns) - 1
1359 xs = numpy.random.normal(0, size=(N, n))
1360 xb = numpy.random.normal(1, size=(N, n))
1363 data = pandas.DataFrame(numpy.c_[numpy.r_[xs, xb], numpy.r_[ys, yb]], columns=columns)
1364 return data.reindex(numpy.random.permutation(data.index))
1368 seaborn.set(font_scale=3)
1369 seaborn.set_style(
'whitegrid')
1373 data = get_data(N, columns=[
'FastBDT',
'NeuroBayes',
'isSignal'])
1375 data.type.iloc[:N / 2] =
'Train'
1376 data.type.iloc[N / 2:] =
'Test'
1379 p.add(data,
'FastBDT')
1381 p.save(
'box_plot.png')
1384 p.add(data,
'FastBDT')
1385 p.add(data,
'NeuroBayes')
1387 p.save(
'verbose_distribution_plot.png')
1390 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1391 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1393 p.save(
'roc_purity_plot.png')
1396 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1397 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1399 p.save(
'roc_rejection_plot.png')
1402 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1403 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1405 p.save(
'diagonal_plot.png')
1408 p.add(data,
'FastBDT')
1409 p.add(data,
'NeuroBayes')
1411 p.save(
'distribution_plot.png')
1414 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1415 p.add(data,
'NeuroBayes', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1417 p.save(
'difference_plot.png')
1420 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1422 p.save(
'overtraining_plot.png')
1425 p.add(data,
'FastBDT',
'NeuroBayes', [0, 20, 40, 60, 80, 100], data[
'isSignal'] == 0)
1427 p.save(
'correlation_plot.png')
1430 data[
'FastBDT2'] = data[
'FastBDT']**2
1431 data[
'NeuroBayes2'] = data[
'NeuroBayes']**2
1432 data[
'FastBDT3'] = data[
'FastBDT']**3
1433 data[
'NeuroBayes3'] = data[
'NeuroBayes']**3
1434 p.add(data, [
'FastBDT',
'NeuroBayes',
'FastBDT2',
'NeuroBayes2',
'FastBDT3',
'NeuroBayes3'])
1436 p.save(
'correlation_matrix.png')
def calculate_flatness(f, p, w=None)
x_axis_label
Label on x axis.
def __init__(self, figure=None, axis=None, x_axis_label=None)
def add(self, data, column, mask=None, weight_column=None)
signal_axis
add signal subplot
def add(self, data, columns, signal_mask, bckgrd_mask)
colorbar_axis
Colorbar axis contains the colorbar.
None bckgrd_axis
Axis which shows the correlation of the background samples.
def __init__(self, figure=None)
None figure
figure which is used to draw
None signal_axis
Main axis which shows the correlation of the signal samples.
bckgrd_axis
add background subplot
axis
Usual axis object which every Plotter object needs, here it is just a dummy.
def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None)
axis_d1
define second subplot
None axis_d1
Axis which shows shape of signal.
None axis
Main axis which is used to draw.
def __init__(self, figure=None)
axis_d2
define third subplot
None figure
figure which is used to draw
None axis_d2
Axis which shows shape of background.
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None)
x_axis_label
Label on x axis.
shift_to_zero
Mean difference is shifted to zero (removes constant offset) if this is true.
def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False)
def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None)
def finish(self, line_color='black')
normed
Minuend and subtrahend are normed before comparing them if this is true.
def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False, keep_first_binning=False, range_in_std=None)
def add(self, data, column, mask=None, weight_column=None, label=None)
keep_first_binning
Keep first binning if user wants so.
normed_to_all_entries
Normalize histograms before drawing them.
first_binning
first binning
range_in_std
Show only a certain range in terms of standard deviations of the data.
normed_to_bin_width
Normalize histograms before drawing them.
def add(self, data, columns, variables)
def add(self, i, *args, **kwargs)
def __init__(self, cls, number_of_plots, figure=None)
None figure
figure which is used to draw
sub_plots
the subplots which are displayed in the grid
axis
the axis of the first subplot
axis_d1
define second subplot
def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None)
None axis_d1
Axis which shows the difference between training and test signal.
None axis
Main axis which is used to draw.
def __init__(self, figure=None)
axis_d2
define third subplot
None figure
figure which is used to draw
None axis_d2
Axis which shows the difference between training and test background.
def finish(self, *args, **kwargs)
fill_kwargs
Default keyword arguments for fill_between function.
None ymin
Minimum y value.
def set_errorband_options(self, errorband_kwargs={ 'alpha':0.5})
plots
create empty list for plots
None ymax
Maximum y value.
errorband_kwargs
Default keyword arguments for errorband function.
None axis
Main axis which is used to draw.
def add(self, *args, **kwargs)
None xmin
Minimum x value.
def set_fill_options(self, fill_kwargs=None)
def __init__(self, figure=None, axis=None)
None figure
figure which is used to draw
None plots
Plots added to the axis so far.
prop_cycler
Property cycler used to give plots unique colors.
errorbar_kwargs
Default keyword arguments for errorbar function.
labels
create empty list for labels
axis
divide figure into subplots
def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None)
def set_errorbar_options(self, errorbar_kwargs={ 'fmt':'.', 'elinewidth':3, 'alpha':1})
Overrides default errorbar options for datapoint errorbars.
None labels
Labels of the plots added so far.
def add_subplot(self, gridspecs)
def set_plot_options(self, plot_kwargs={ 'linestyle':''})
plot_kwargs
Default keyword arguments for plot function.
None xmax
Maximum x value.
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True)
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True)
def add(self, data, columns, *masks)
def add(self, data, column, mask=None, weight_column=None, label=None)
distribution
The distribution plot.
range_in_std
Show only a certain range in terms of standard deviations of the data.
None box_axes
Axes for the boxplots.
box_axes
create empty list for box axes
normed
Normalize histograms before drawing them.
def __init__(self, figure=None, axis=None, normed=False, range_in_std=None, x_axis_label=None)
def weighted_mean_and_std(x, w)