14 from basf2
import B2INFO, B2WARNING
15 import basf2_mva_evaluation.histogram
as histogram
16 import matplotlib.ticker
17 import matplotlib.patches
18 import matplotlib.colors
19 import matplotlib.gridspec
20 import matplotlib.figure
21 import matplotlib.artist
22 import matplotlib.pyplot
as plt
32 matplotlib.rcParams.update({
'font.size': 40})
33 matplotlib.rcParams[
'text.usetex'] =
True
34 matplotlib.rcParams[
'text.latex.preamble'] = [
r"\usepackage{amsmath}"]
39 Base class for all Plotters.
61 def __init__(self, figure=None, axis=None):
63 Creates a new figure and axis if None is given, sets the default plot parameters
64 @param figure default draw figure which is used
65 @param axis default draw axis which is used
67 B2INFO(
"Create new figure for class " + str(type(self)))
69 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
70 self.figure.set_tight_layout(
False)
75 self.axis = self.figure.add_subplot(1, 1, 1)
81 self.xmin, self.xmax = float(0), float(1)
82 self.ymin, self.ymax = float(0), float(1)
89 self.plot_kwargs =
None
91 self.errorbar_kwargs =
None
93 self.errorband_kwargs =
None
95 self.fill_kwargs =
None
97 self.set_plot_options()
98 self.set_errorbar_options()
99 self.set_errorband_options()
100 self.set_fill_options()
102 def add_subplot(self, gridspecs):
104 Adds a new subplot to the figure, updates all other axes
105 according to the given gridspec
106 @param gridspecs gridspecs for all axes including the new one
108 for gs, ax
in zip(gridspecs[:-1], self.figure.axes):
109 ax.set_position(gs.get_position(self.figure))
110 ax.set_subplotspec(gs)
111 axis = self.figure.add_subplot(gridspecs[-1], sharex=self.axis)
114 def save(self, filename):
116 Save the figure into a file
117 @param filename of the file
119 B2INFO(
"Save figure for class " + str(type(self)))
120 from matplotlib.backends.backend_agg
import FigureCanvasAgg
as FigureCanvas
121 canvas = FigureCanvas(self.figure)
122 canvas.print_figure(filename, dpi=50)
125 def set_plot_options(self, plot_kwargs={'linestyle':
''}):
127 Overrides default plot options for datapoint plot
128 @param plot_kwargs keyword arguments for the plot function
130 self.plot_kwargs = copy.copy(plot_kwargs)
133 def set_errorbar_options(self, errorbar_kwargs={'fmt':
'.',
'elinewidth': 3,
'alpha': 1}):
135 Overrides default errorbar options for datapoint errorbars
136 @param errorbar_kwargs keyword arguments for the errorbar function
138 self.errorbar_kwargs = copy.copy(errorbar_kwargs)
141 def set_errorband_options(self, errorband_kwargs={'alpha': 0.5}):
143 Overrides default errorband options for datapoint errorband
144 @param errorbar_kwargs keyword arguments for the fill_between function
146 self.errorband_kwargs = copy.copy(errorband_kwargs)
149 def set_fill_options(self, fill_kwargs=None):
151 Overrides default fill_between options for datapoint errorband
152 @param fill_kwargs keyword arguments for the fill_between function
154 self.fill_kwargs = copy.copy(fill_kwargs)
157 def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None):
159 Plot the given datapoints, with plot, errorbar and make a errorband with fill_between
160 @param x coordinates of the data points
161 @param y coordinates of the data points
162 @param xerr symmetric error on x data points
163 @param yerr symmetric error on y data points
166 plot_kwargs = copy.copy(self.plot_kwargs)
167 errorbar_kwargs = copy.copy(self.errorbar_kwargs)
168 errorband_kwargs = copy.copy(self.errorband_kwargs)
169 fill_kwargs = copy.copy(self.fill_kwargs)
171 if plot_kwargs
is None or 'color' not in plot_kwargs:
172 color = next(axis._get_lines.prop_cycler)
173 color = color[
'color']
174 plot_kwargs[
'color'] = color
176 color = plot_kwargs[
'color']
177 color = matplotlib.colors.ColorConverter().to_rgb(color)
178 patch = matplotlib.patches.Patch(color=color, alpha=0.7)
179 patch.get_color = patch.get_facecolor
182 if plot_kwargs
is not None:
183 p, = axis.plot(x, y, **plot_kwargs)
186 if errorbar_kwargs
is not None and (xerr
is not None or yerr
is not None):
187 if 'color' not in errorbar_kwargs:
188 errorbar_kwargs[
'color'] = color
189 if 'ecolor' not in errorbar_kwargs:
190 errorbar_kwargs[
'ecolor'] = [0.4 * x
for x
in color]
194 errorbar_kwargs[
'elinewidth'] = 5
195 e = axis.errorbar(x, y, xerr=xerr, yerr=yerr, **errorbar_kwargs)
198 if errorband_kwargs
is not None and yerr
is not None:
199 if 'color' not in errorband_kwargs:
200 errorband_kwargs[
'color'] = color
205 for _x, _y, _xe, _ye
in zip(x, y, xerr, yerr):
206 axis.add_patch(matplotlib.patches.Rectangle((_x - _xe, _y - _ye), 2 * _xe, 2 * _ye,
209 f = axis.fill_between(x, y - yerr, y + yerr, interpolate=
True, **errorband_kwargs)
211 if fill_kwargs
is not None:
212 axis.fill_between(x, y, 0, **fill_kwargs)
214 return (tuple(patches), p, e, f)
216 def add(self, *args, **kwargs):
218 Add a new plot to this plotter
220 return NotImplemented
222 def finish(self, *args, **kwargs):
224 Finish plotting and set labels, legends and stuff
226 return NotImplemented
228 def scale_limits(self):
230 Scale limits to increase distance to boundaries
232 self.ymin *= 1.0 - math.copysign(self.yscale, self.ymin)
233 self.ymax *= 1.0 + math.copysign(self.yscale, self.ymax)
234 self.xmin *= 1.0 - math.copysign(self.xscale, self.xmin)
235 self.xmax *= 1.0 + math.copysign(self.xscale, self.xmax)
239 class PurityAndEfficiencyOverCut(Plotter):
241 Plots the purity and the efficiency over the cut value (for cut choosing)
248 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
250 Add a new curve to the plot
251 @param data pandas.DataFrame containing all data
252 @param column which is used to calculate efficiency and purity for different cuts
253 @param signal_mask boolean numpy.array defining which events are signal events
254 @param bckgrd_mask boolean numpy.array defining which events are background events
255 @param weight_column column in data containing the weights for each event
258 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
261 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
262 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
264 efficiency, efficiency_error = hists.get_true_positives([
'Signal'])
265 purity, purity_error = hists.get_false_positives([
'Background'])
267 cuts = hists.bin_centers
269 self.xmin, self.xmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmax])
270 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(efficiency), numpy.nanmin(purity), self.ymin]), \
271 numpy.nanmax([numpy.nanmax(efficiency), numpy.nanmax(purity), self.ymax])
273 self.plots.append(self._plot_datapoints(self.axis, cuts, efficiency, xerr=0, yerr=efficiency_error))
276 self.labels.append(
"Efficiency")
278 self.labels.append(
"True positive")
280 self.plots.append(self._plot_datapoints(self.axis, cuts, purity, xerr=0, yerr=purity_error))
283 self.labels.append(
"Purity")
285 self.labels.append(
"False positive")
291 Sets limits, title, axis-labels and legend of the plot
293 self.axis.set_xlim((self.xmin, self.xmax))
294 self.axis.set_ylim((self.ymin, self.ymax))
295 self.axis.set_title(
"Classification Plot")
296 self.axis.get_xaxis().set_label_text(
'Cut Value')
297 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
301 class SignalToNoiseOverCut(Plotter):
303 Plots the signal to noise ratio over the cut value (for cut choosing)
310 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
312 Add a new curve to the plot
313 @param data pandas.DataFrame containing all data
314 @param column which is used to calculate signal to noise ratio for different cuts
315 @param signal_mask boolean numpy.array defining which events are signal events
316 @param bckgrd_mask boolean numpy.array defining which events are background events
317 @param weight_column column in data containing the weights for each event
320 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
322 signal2noise, signal2noise_error = hists.get_signal_to_noise([
'Signal'], [
'Background'])
324 cuts = hists.bin_centers
326 self.xmin, self.xmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmax])
327 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(signal2noise), self.ymin]), \
328 numpy.nanmax([numpy.nanmax(signal2noise), self.ymax])
330 self.plots.append(self._plot_datapoints(self.axis, cuts, signal2noise, xerr=0, yerr=signal2noise_error))
332 self.labels.append(column)
338 Sets limits, title, axis-labels and legend of the plot
340 self.axis.set_xlim((self.xmin, self.xmax))
341 self.axis.set_ylim((self.ymin, self.ymax))
342 self.axis.set_title(
"Signal to Noise Plot")
343 self.axis.get_xaxis().set_label_text(
'Cut Value')
344 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
348 class PurityOverEfficiency(Plotter):
350 Plots the purity over the efficiency also known as ROC curve
357 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
359 Add a new curve to the ROC plot
360 @param data pandas.DataFrame containing all data
361 @param column which is used to calculate efficiency and purity for different cuts
362 @param signal_mask boolean numpy.array defining which events are signal events
363 @param bckgrd_mask boolean numpy.array defining which events are background events
364 @param weight_column column in data containing the weights for each event
366 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
367 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
368 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
370 self.xmin, self.xmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmax])
371 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(purity), self.ymin]), numpy.nanmax([numpy.nanmax(purity), self.ymax])
373 p = self._plot_datapoints(self.axis, efficiency, purity, xerr=efficiency_error, yerr=purity_error)
375 if label
is not None:
376 self.labels.append(label)
378 self.labels.append(column)
383 Sets limits, title, axis-labels and legend of the plot
385 self.axis.set_xlim((self.xmin, self.xmax))
386 self.axis.set_ylim((self.ymin, self.ymax))
387 self.axis.set_title(
"ROC Purity Plot")
388 self.axis.get_xaxis().set_label_text(
'Efficiency')
389 self.axis.get_yaxis().set_label_text(
'Purity')
390 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
394 class RejectionOverEfficiency(Plotter):
396 Plots the rejection over the efficiency also known as ROC curve
403 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
405 Add a new curve to the ROC plot
406 @param data pandas.DataFrame containing all data
407 @param column which is used to calculate efficiency and purity for different cuts
408 @param signal_mask boolean numpy.array defining which events are signal events
409 @param bckgrd_mask boolean numpy.array defining which events are background events
410 @param weight_column column in data containing the weights for each event
412 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
413 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
414 rejection, rejection_error = hists.get_efficiency([
'Background'])
415 rejection = 1 - rejection
417 self.xmin, self.xmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmax])
418 self.ymin, self.ymax = numpy.nanmin([rejection.min(), self.ymin]), numpy.nanmax([rejection.max(), self.ymax])
420 auc = numpy.abs(numpy.trapz(rejection, efficiency))
422 p = self._plot_datapoints(self.axis, efficiency, rejection, xerr=efficiency_error, yerr=rejection_error)
424 if label
is not None:
425 self.labels.append(label[:10] +
r"$\ {\rm AUC}\ =\ $" +
r"${:.2f}$".format(auc))
427 self.labels.append(
r"${\rm AUC}\ =\ $" +
r"${:.2f}$".format(auc))
433 Sets limits, title, axis-labels and legend of the plot
435 self.axis.set_xlim((self.xmin, self.xmax))
436 self.axis.set_ylim((self.ymin, self.ymax))
438 self.axis.get_xaxis().set_tick_params(labelsize=60)
439 self.axis.get_yaxis().set_tick_params(labelsize=60)
441 self.axis.get_xaxis().labelpad = 20
442 self.axis.get_yaxis().labelpad = 20
443 self.axis.get_xaxis().set_label_text(
r'${\rm Signal\ Efficiency}$', fontsize=65)
444 self.axis.get_yaxis().set_label_text(
r'${\rm Background\ Rejection}$', fontsize=65)
445 self.axis.legend([x[0]
for x
in self.plots], self.labels, fancybox=
True, framealpha=0.5, fontsize=60, loc=3)
449 class Multiplot(Plotter):
451 Plots multiple other plots into a grid 3x?
458 def __init__(self, cls, number_of_plots, figure=None):
460 Creates a new figure if None is given, sets the default plot parameters
461 @param figure default draw figure which is used
464 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
465 self.figure.set_tight_layout(
True)
469 if number_of_plots == 1:
470 gs = matplotlib.gridspec.GridSpec(1, 1)
471 elif number_of_plots == 2:
472 gs = matplotlib.gridspec.GridSpec(1, 2)
473 elif number_of_plots == 3:
474 gs = matplotlib.gridspec.GridSpec(1, 3)
476 gs = matplotlib.gridspec.GridSpec(int(numpy.ceil(number_of_plots / 3)), 3)
479 self.sub_plots = [cls(self.figure, self.figure.add_subplot(gs[i // 3, i % 3]))
for i
in range(number_of_plots)]
480 self.axis = self.sub_plots[0].axis
481 super().__init__(self.figure, self.axis)
483 def add(self, i, *args, **kwargs):
485 Call add function of ith subplot
486 @param i position of the subplot
488 self.sub_plots[i].add(*args, **kwargs)
492 Sets limits, title, axis-labels and legend of the plot
494 for plot
in self.sub_plots:
499 class Diagonal(Plotter):
501 Plots the purity in each bin over the classifier output.
508 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None):
510 Add a new curve to the Diagonal plot
511 @param data pandas.DataFrame containing all data
512 @param column which is used to calculate purity for different cuts
513 @param signal_mask boolean numpy.array defining which events are signal events
514 @param bckgrd_mask boolean numpy.array defining which events are background events
515 @param weight_column column in data containing the weights for each event
517 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
518 purity, purity_error = hists.get_purity_per_bin([
'Signal'], [
'Background'])
520 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
522 self.ymin, self.ymax = 0, 1
524 p = self._plot_datapoints(self.axis, hists.bin_centers, purity, xerr=hists.bin_widths / 2.0, yerr=purity_error)
526 self.labels.append(column)
531 Sets limits, title, axis-labels and legend of the plot
534 self.axis.
plot((0.0, 1.0), (0.0, 1.0), color=
'black')
535 self.axis.set_xlim((self.xmin, self.xmax))
536 self.axis.set_ylim((self.ymin, self.ymax))
537 self.axis.set_title(
"Diagonal Plot")
538 self.axis.get_xaxis().set_label_text(
'Classifier Output')
539 self.axis.get_yaxis().set_label_text(
'Purity Per Bin')
540 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
544 class Distribution(Plotter):
546 Plots distribution of a quantity
549 def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False,
550 keep_first_binning=False, range_in_std=None, logScale=False):
552 Creates a new figure and axis if None is given, sets the default plot parameters
553 @param figure default draw figure which is used
554 @param axis default draw axis which is used
555 @param normed true if histograms should be normed before drawing
556 @param keep_first_binning use the binning of the first distribution for further plots
557 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
559 super().__init__(figure, axis)
561 self.normed_to_all_entries = normed_to_all_entries
563 self.normed_to_bin_width = normed_to_bin_width
565 self.range_in_std = range_in_std
570 self.ymax = float(
'-inf')
572 self.xmin = float(
'inf')
574 self.xmax = float(
'-inf')
576 self.keep_first_binning = keep_first_binning
578 self.first_binning =
None
580 self.x_axis_label =
''
582 self.logScale =
False
586 def add(self, data, column, mask=None, weight_column=None, label=None, bins=50):
588 Add a new distribution to the plots
589 @param data pandas.DataFrame containing all data
590 @param column which is used to calculate distribution histogram
591 @param mask boolean numpy.array defining which events are used for the histogram
592 @param weight_column column in data containing the weights for each event
595 mask = numpy.ones(len(data)).astype(
'bool')
598 if self.keep_first_binning
and self.first_binning
is not None:
599 bins = self.first_binning
601 bins=bins, equal_frequency=
False, range_in_std=self.range_in_std)
602 if self.keep_first_binning
and self.first_binning
is None:
603 self.first_binning = hists.bins
604 hist, hist_error = hists.get_hist(
'Total')
605 self.binWidth = hists.bin_widths[1]
607 if self.normed_to_all_entries:
608 normalization = float(numpy.sum(hist))
609 hist = hist / normalization
610 hist_error = hist_error / normalization
612 if self.normed_to_bin_width:
613 hist = hist / hists.bin_widths
614 hist_error = hist_error / hists.bin_widths
616 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
617 self.ymin, self.ymax = numpy.nanmin([hist.min(), self.ymin]), numpy.nanmax([(hist + hist_error).max(), self.ymax])
619 p = self._plot_datapoints(self.axis, hists.bin_centers, hist, xerr=hists.bin_widths / 2, yerr=hist_error)
621 self.x_axis_label = column
623 self.labels.append(column)
625 self.labels.append(label)
630 Sets limits, title, axis-labels and legend of the plot
633 self.axis.set_xlim((self.xmin, self.xmax))
636 self.axis.set_yscale(
'log', nonposy=
'clip')
638 self.axis.set_ylim((self.ymin, self.ymax))
639 self.binWidth =
'{:8.2f}'.format(self.binWidth)
642 self.axis.get_xaxis().set_label_text(self.x_axis_label)
643 if self.normed_to_all_entries
and self.normed_to_bin_width:
644 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin / (# Entries * Bin Width)')
645 elif self.normed_to_all_entries:
647 self.axis.get_yaxis().set_label_text(
648 r'{$\frac{\rm Entries\hspace{0.25em} per\hspace{0.25em} Bin}{\rm Entries}\, /\, (' +
649 self.binWidth +
r'\,)$}', fontsize=65)
650 self.axis.get_yaxis().labelpad = 20
651 self.axis.get_yaxis().set_tick_params(labelsize=60)
652 elif self.normed_to_bin_width:
653 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin / Bin Width')
655 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin')
667 def __init__(self, figure=None, axis=None):
669 Creates a new figure and axis if None is given, sets the default plot parameters
670 @param figure default draw figure which is used
671 @param axis default draw axis which is used
673 super().__init__(figure=figure, axis=axis)
676 self.x_axis_label =
""
678 def add(self, data, column, mask=None, weight_column=None):
680 Add a new boxplot to the plots
681 @param data pandas.DataFrame containing all data
682 @param column which is used to calculate boxplot quantities
683 @param mask boolean numpy.array defining which events are used for the histogram
684 @param weight_column column in data containing the weights for each event
687 mask = numpy.ones(len(data)).astype(
'bool')
688 x = data[column][mask]
689 if weight_column
is not None:
691 B2WARNING(
"Weights are currently not used in boxplot, due to limitations in matplotlib")
694 B2WARNING(
"Ignore empty boxplot.")
697 p = self.axis.boxplot(x, sym=
'k.', whis=1.5, vert=
False, patch_artist=
True, showmeans=
True, widths=1,
698 boxprops=dict(facecolor=
'blue', alpha=0.5),
703 self.labels.append(column)
704 self.x_axis_label = column
719 Sets limits, title, axis-labels and legend of the plot
721 matplotlib.artist.setp(self.axis.get_yaxis(), visible=
False)
722 self.axis.get_xaxis().set_label_text(self.x_axis_label)
723 self.axis.set_title(
"Box Plot")
727 class Difference(Plotter):
729 Plots the difference between two histograms
744 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
746 Creates a new figure and axis if None is given, sets the default plot parameters
747 @param figure default draw figure which is used
748 @param axis default draw axis which is used
749 @param normed normalize minuend and subtrahend before comparing them
750 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
752 super().__init__(figure, axis)
754 self.shift_to_zero = shift_to_zero
762 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None):
764 Add a new difference plot
765 @param data pandas.DataFrame containing all data
766 @param column which is used to calculate distribution histogram
767 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
768 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
769 @param weight_column column in data containing the weights for each event
770 @param label label for the legend if None, the column name is used
773 hists =
histogram.Histograms(data, column, {
'Minuend': minuend_mask,
'Subtrahend': subtrahend_mask}, bins=bins,
774 weight_column=weight_column, equal_frequency=
False)
775 minuend, minuend_error = hists.get_hist(
'Minuend')
776 subtrahend, subtrahend_error = hists.get_hist(
'Subtrahend')
780 difference_error = difference_error / (numpy.sum(minuend) + numpy.sum(subtrahend))
781 minuend = minuend / numpy.sum(minuend)
782 subtrahend = subtrahend / numpy.sum(subtrahend)
783 difference = minuend - subtrahend
785 if self.shift_to_zero:
786 difference = difference - numpy.mean(difference)
788 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
789 self.ymin = min((difference - difference_error).min(), self.ymin)
790 self.ymax = max((difference + difference_error).max(), self.ymax)
792 p = self._plot_datapoints(self.axis, hists.bin_centers, difference, xerr=hists.bin_widths / 2, yerr=difference_error)
795 self.labels.append(label)
797 self.labels.append(column)
798 self.x_axis_label = column
801 def finish(self, line_color='black'):
803 Sets limits, title, axis-labels and legend of the plot
805 self.axis.
plot((self.xmin, self.xmax), (0, 0), color=line_color, linewidth=4)
807 self.axis.set_xlim((self.xmin, self.xmax))
808 self.axis.set_ylim((self.ymin, self.ymax))
809 self.axis.set_title(
"Difference Plot")
810 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
811 self.axis.get_xaxis().set_label_text(self.x_axis_label)
812 self.axis.set_ylabel(
r'{\rm Difference}', fontsize=40, labelpad=20)
813 self.axis.get_xaxis().grid(
True)
818 class normalizedResiduals(Plotter):
820 Plots the difference between two histograms
837 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
839 Creates a new figure and axis if None is given, sets the default plot parameters
840 @param figure default draw figure which is used
841 @param axis default draw axis which is used
842 @param normed normalize minuend and subtrahend before comparing them
843 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
845 super().__init__(figure, axis)
847 self.shift_to_zero = shift_to_zero
855 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None, bins=50, isNN=False):
857 Add a new difference plot
858 @param data pandas.DataFrame containing all data
859 @param column which is used to calculate distribution histogram
860 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
861 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
862 @param weight_column column in data containing the weights for each event
863 @param label label for the legend if None, the column name is used
866 hists =
histogram.Histograms(data, column, {
'Minuend': minuend_mask,
'Subtrahend': subtrahend_mask}, bins=bins,
867 weight_column=weight_column, equal_frequency=
False)
868 minuend, minuend_error = hists.get_hist(
'Minuend')
869 subtrahend, subtrahend_error = hists.get_hist(
'Subtrahend')
871 print(
"Here BinWidths Norm", hists.bin_widths)
875 difference_error = numpy.sqrt((minuend_error / numpy.sum(minuend))**2 + (subtrahend_error / numpy.sum(subtrahend))**2)
876 minuend = minuend / numpy.sum(minuend)
877 subtrahend = subtrahend / numpy.sum(subtrahend)
878 difference = minuend - subtrahend
879 normalizedRes = (minuend - subtrahend) / difference_error
881 if self.shift_to_zero:
882 difference = difference - numpy.mean(difference)
888 self.xmin = float(-1.0)
890 self.xmin, self.xmax = self.xmin, self.xmax
892 p = self._plot_datapoints(self.axis, hists.bin_centers, normalizedRes, xerr=hists.bin_widths / 2, yerr=1)
895 self.labels.append(label)
897 self.labels.append(column)
898 self.x_axis_label = column
901 def finish(self, line_color='black'):
903 Sets limits, title, axis-labels and legend of the plot
907 self.axis.set_xlim((self.xmin, self.xmax))
908 self.axis.set_ylim((-5, 5))
909 self.axis.set_title(
"Difference Plot")
910 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
911 self.axis.get_xaxis().set_label_text(self.x_axis_label)
912 self.axis.set_ylabel(
r'${\rm Normalized}$' +
'\n' +
r'${\rm Residuals}$', fontsize=40, labelpad=20)
913 self.axis.get_yaxis().set_ticks([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
914 self.axis.get_yaxis().set_ticklabels([
r'',
r'$-4$',
r'',
r'$-2$',
r'',
r'$0$',
r'',
r'$2$',
r'',
r'$4$',
r''], fontsize=45)
915 self.axis.get_xaxis().grid(
True)
917 self.axis.
plot((self.xmin, self.xmax), (3, 3), linewidth=4, color=
'#006600', linestyle=
'-')
918 self.axis.
plot((self.xmin, self.xmax), (1, 1), linewidth=4, color=
'b', linestyle=
'-')
919 self.axis.
plot((self.xmin, self.xmax), (-1, -1), linewidth=4, color=
'b', linestyle=
'-')
920 self.axis.
plot((self.xmin, self.xmax), (-3, -3), linewidth=4, color=
'#006600', linestyle=
'-')
926 class Overtraining(Plotter):
928 Create TMVA-like overtraining control plot for a classification training
940 def __init__(self, figure=None):
942 Creates a new figure if None is given, sets the default plot parameters
943 @param figure default draw figure which is used
946 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
947 self.figure.set_tight_layout(
True)
951 gs = matplotlib.gridspec.GridSpec(5, 1)
952 self.axis = self.figure.add_subplot(gs[:3, :])
953 self.axis_d1 = self.figure.add_subplot(gs[3, :], sharex=self.axis)
954 self.axis_d2 = self.figure.add_subplot(gs[4, :], sharex=self.axis)
956 super().__init__(self.figure, self.axis)
958 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None, bkgrOutput=0, isNN=False):
960 Add a new overtraining plot, I recommend to raw only one overtraining plot at the time,
961 otherwise there are too many curves in the plot to recognize anything in the plot.
962 @param data pandas.DataFrame containing all data
963 @param column which is used to calculate distribution histogram
964 @param train_mask boolean numpy.array defining which events are training events
965 @param test_mask boolean numpy.array defining which events are test events
966 @param signal_mask boolean numpy.array defining which events are signal events
967 @param bckgrd_mask boolean numpy.array defining which events are background events
968 @param weight_column column in data containing the weights for each event
970 distribution = Distribution(self.figure, self.axis, normed_to_all_entries=
True)
975 bins = list(range(-51, 55, 1))
976 for i
in range(0, len(bins)):
977 bins[i] = float(bins[i]) / 25
979 bins = list(range(-51, 55, 1))
980 for i
in range(0, len(bins)):
981 bins[i] = float(bins[i]) / 50
984 distribution.logScale =
True
985 distribution.labels = [
r'{\rm Test-Bkgr.}',
r'{\rm Train-Bkgr.}',
r'{\rm Test-Signal}',
r'{\rm Train-Signal}']
987 distribution.labels = [
988 r'{\rm Test-$\bar{B}^{0}$}',
989 r'{\rm Train-$\bar{B}^{0}$}',
990 r'{\rm Test-$B^{0}$}',
991 r'{\rm Train-$B^{0}$}']
993 distribution.set_plot_options(self.plot_kwargs)
995 distribution.set_errorbar_options({
'fmt':
'o',
'elinewidth': 5,
'alpha': 1,
'markersize': 20,
'ecolor':
'w'})
997 distribution.set_errorband_options(
None)
998 distribution.add(data, column, test_mask & bckgrd_mask, weight_column,
None, bins)
999 distribution.add(data, column, test_mask & signal_mask, weight_column,
None, bins)
1001 distribution.set_errorbar_options(
None)
1003 distribution.set_plot_options({
'color': distribution.plots[0][0][0].get_color(
1004 ),
'drawstyle':
'steps-mid',
'linestyle':
'dashed',
'lw': 5})
1005 distribution.set_fill_options(
None)
1006 distribution.add(data, column, train_mask & bckgrd_mask, weight_column,
None, bins)
1007 distribution.set_plot_options({
'color': distribution.plots[1][0][0].get_color(
1008 ),
'drawstyle':
'steps-mid',
'linestyle':
'solid',
'lw': 5})
1010 distribution.add(data, column, train_mask & signal_mask, weight_column,
None, bins)
1012 distribution.finish()
1014 p1 = distribution.axis.errorbar([], [], xerr=0, yerr=0, elinewidth=5, mew=2, ecolor=
'w',
1016 fmt=
'o', mfc=distribution.plots[0][0][0].get_color(),
1017 mec=distribution.plots[0][0][0].get_color(), markersize=20, label=
r'${\rm Test-Bkgr.}$')
1018 p2, = distribution.axis.plot([], label=
r'${\rm Train-Bkgr.}$', linewidth=5,
1019 linestyle=
'dashed', c=distribution.plots[0][0][0].get_color())
1020 p3 = distribution.axis.errorbar([], [], xerr=0, yerr=0, elinewidth=5, mew=2, ecolor=
'w',
1022 fmt=
'o', mfc=distribution.plots[1][0][0].get_color(),
1023 mec=distribution.plots[1][0][0].get_color(), markersize=20, label=
r'${\rm Test-Signal}$')
1024 p4, = distribution.axis.plot([], label=
r'${\rm Train-Signal}$', linewidth=5,
1025 linestyle=
'solid', alpha=0.9, c=distribution.plots[1][0][0].get_color())
1027 distribution.axis.legend([p1, p2, p3, p4], distribution.labels, loc=
'best', fancybox=
True, framealpha=0.5, fontsize=60)
1029 self.plot_kwargs[
'color'] = distribution.plots[0][0][0].get_color()
1030 difference_bckgrd = normalizedResiduals(self.figure, self.axis_d1, shift_to_zero=
True, normed=
True)
1031 difference_bckgrd.set_plot_options(self.plot_kwargs)
1032 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
1033 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
1034 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column,
None, bins, isNN)
1035 self.axis_d1.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
1036 self.axis_d1.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
1038 difference_bckgrd.plots = difference_bckgrd.labels = []
1039 difference_bckgrd.finish(line_color=distribution.plots[0][0][0].get_color())
1041 self.plot_kwargs[
'color'] = distribution.plots[1][0][0].get_color()
1042 difference_signal = normalizedResiduals(self.figure, self.axis_d2, shift_to_zero=
True, normed=
True)
1043 difference_signal.set_plot_options(self.plot_kwargs)
1044 difference_signal.set_errorbar_options(self.errorbar_kwargs)
1045 difference_signal.set_errorband_options(self.errorband_kwargs)
1046 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column,
None, bins, isNN)
1047 self.axis_d2.set_xlim((difference_signal.xmin, difference_signal.xmax))
1048 self.axis_d2.set_ylim((difference_signal.ymin, difference_signal.ymax))
1049 difference_signal.plots = difference_signal.labels = []
1050 difference_signal.finish(line_color=distribution.plots[1][0][0].get_color())
1078 def finish(self, xLabel=r'${\rm Classifier\ Output}$'):
1080 Sets limits, title, axis-labels and legend of the plot
1083 self.axis_d1.set_title(
"")
1084 self.axis_d2.set_title(
"")
1085 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=
False)
1086 matplotlib.artist.setp(self.axis_d1.get_xticklabels(), visible=
False)
1087 self.axis.get_xaxis().set_label_text(
'')
1088 self.axis_d1.get_xaxis().set_label_text(
'')
1089 self.axis_d2.get_xaxis().set_label_text(xLabel, fontsize=85)
1090 self.axis_d2.get_xaxis().labelpad = 20
1091 self.axis_d2.get_xaxis().set_tick_params(labelsize=60)
1095 class VerboseDistribution(Plotter):
1097 Plots distribution of a quantity including boxplots
1103 def __init__(self, figure=None, axis=None, normed=False, range_in_std=None):
1105 Creates a new figure and axis if None is given, sets the default plot parameters
1106 @param figure default draw figure which is used
1107 @param axis default draw axis which is used
1108 @param normed true if the histograms should be normed before drawing
1109 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
1111 super().__init__(figure, axis)
1113 self.normed = normed
1115 self.range_in_std = range_in_std
1118 self.distribution = Distribution(self.figure, self.axis, normed_to_all_entries=self.normed, range_in_std=self.range_in_std)
1120 def add(self, data, column, mask=None, weight_column=None, label=None):
1122 Add a new distribution plot, with additional information like a boxplot compared to
1123 the ordinary Distribution plot.
1124 @param data pandas.DataFrame containing all data
1125 @param column which is used to calculate distribution histogram
1126 @param mask boolean numpy.array defining which events are used for the distribution histogram
1127 @param weight_column column in data containing the weights for each event
1129 self.distribution.set_plot_options(self.plot_kwargs)
1130 self.distribution.set_errorbar_options(self.errorbar_kwargs)
1131 self.distribution.set_errorband_options(self.errorband_kwargs)
1132 self.distribution.add(data, column, mask, weight_column, label=label)
1134 n = len(self.box_axes) + 1
1135 gs = matplotlib.gridspec.GridSpec(4 * n, 1)
1136 gridspecs = [gs[:3 * n, :]] + [gs[3 * n + i, :]
for i
in range(n)]
1137 box_axis = self.add_subplot(gridspecs)
1139 if self.range_in_std
is not None:
1142 mask = mask & (data[column] > (mean - self.range_in_std * std)) & (data[column] < (mean + self.range_in_std * std))
1143 box = Box(self.figure, box_axis)
1144 box.add(data, column, mask, weight_column)
1145 if len(box.plots) > 0:
1146 box.plots[0][
'boxes'][0].set_facecolor(self.distribution.plots[-1][0][0].get_color())
1149 self.box_axes.append(box_axis)
1154 Sets limits, title, axis-labels and legend of the plot
1156 self.distribution.finish()
1157 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=
False)
1158 self.axis.get_xaxis().set_label_text(
'')
1159 for box_axis
in self.box_axes[:-1]:
1160 matplotlib.artist.setp(box_axis.get_xticklabels(), visible=
False)
1161 box_axis.set_title(
"")
1162 box_axis.get_xaxis().set_label_text(
'')
1163 self.box_axes[-1].set_title(
"")
1164 self.axis.set_title(
"Distribution Plot")
1165 self.axis.legend([x[0]
for x
in self.distribution.plots], self.distribution.labels,
1166 loc=
'best', fancybox=
True, framealpha=0.5)
1170 class Correlation(Plotter):
1172 Plots change of a distribution of a quantity depending on the cut on a classifier
1183 def __init__(self, figure=None):
1185 Creates a new figure if None is given, sets the default plot parameters
1186 @param figure default draw figure which is used
1189 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
1190 self.figure.set_tight_layout(
True)
1192 self.figure = figure
1194 gs = matplotlib.gridspec.GridSpec(3, 2)
1195 self.axis = self.figure.add_subplot(gs[0, :])
1196 self.axis_d1 = self.figure.add_subplot(gs[1, :], sharex=self.axis)
1197 self.axis_d2 = self.figure.add_subplot(gs[2, :], sharex=self.axis)
1199 super().__init__(self.figure, self.axis)
1201 def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None):
1203 Add a new correlation plot.
1204 @param data pandas.DataFrame containing all data
1205 @param column which is used to calculate distribution histogram
1206 @param cut_column which is used to calculate cut on the other quantity defined by column
1207 @param quantiles list of quantiles between 0 and 100, defining the different cuts
1208 @param weight_column column in data containing the weights for each event
1210 if len(data[cut_column]) == 0:
1211 B2WARNING(
"Ignore empty Correlation.")
1214 axes = [self.axis, self.axis_d1, self.axis_d2]
1216 for i, (l, m)
in enumerate([(
'.', signal_mask | bckgrd_mask), (
'S', signal_mask), (
'B', bckgrd_mask)]):
1218 if weight_column
is not None:
1219 weights = numpy.array(data[weight_column][m])
1221 weights = numpy.ones(len(data[column][m]))
1225 xrange = np.percentile(data[column][m], [5, 95]).astype(np.float32)
1227 colormap = plt.get_cmap(
'coolwarm')
1228 tmp, x = np.histogram(data[column][m], bins=100,
1229 range=xrange, normed=
True, weights=weights)
1230 bin_center = ((x + np.roll(x, 1)) / 2)[1:]
1231 axes[i].
plot(bin_center, tmp, color=
'black', lw=1)
1233 for quantil
in np.arange(5, 100, 5):
1234 cut = np.percentile(data[cut_column][m], quantil)
1235 sel = data[cut_column][m] >= cut
1236 y, x = np.histogram(data[column][m][sel], bins=100,
1237 range=xrange, normed=
True, weights=weights[sel])
1238 bin_center = ((x + np.roll(x, 1)) / 2)[1:]
1239 axes[i].fill_between(bin_center, tmp, y, color=colormap(quantil / 100.0))
1242 axes[i].set_ylim(bottom=0)
1245 axes[i].set_title(
r'Distribution for different quantiles: $\mathrm{{Flatness}}_{} = {:.3f}$'.format(l, flatness_score))
1250 Sets limits, title, axis-labels and legend of the plot
1255 class TSNE(Plotter):
1257 Plots multivariate distribution using TSNE algorithm
1260 def add(self, data, columns, *masks):
1262 Add a new correlation plot.
1263 @param data pandas.DataFrame containing all data
1264 @param columns which are used to calculate the correlations
1265 @param masks different classes to show in TSNE
1269 import sklearn.manifold
1270 model = sklearn.manifold.TSNE(n_components=2, random_state=0)
1271 data = numpy.array([data[column]
for column
in columns]).T
1274 data = numpy.array([data[column][mask]
for column
in columns]).T
1275 data = model.transform(data)
1276 self.axis.scatter(data[:, 0], data[:, 1])
1278 print(
"Cannot create TSNE plot. Install sklearn if you want it")
1283 Sets limits, title, axis-labels and legend of the plot
1288 class Importance(Plotter):
1290 Plots importance matrix
1293 def add(self, data, columns, variables, displayHeatMap):
1295 Add a new correlation plot.
1296 @param data pandas.DataFrame containing all data
1297 @param columns which are used to calculate the correlations
1299 self.figure.set_tight_layout(
True)
1302 width = (numpy.max(x) - numpy.min(x))
1304 return numpy.zeros(x.shape)
1305 return (x - numpy.min(x)) / width * 100
1307 importance_matrix = numpy.vstack([norm(data[column])
for column
in columns]).T
1309 cRdBu = plt.get_cmap(
'RdBu')
1310 new_RdBu = truncate_colormap(cRdBu, 0.5, 0.85)
1313 labels = list(variables)
1315 for y
in range(importance_matrix.shape[0]):
1316 for x
in range(importance_matrix.shape[1]):
1317 labelsValues.append([importance_matrix[y, x], labels[y]])
1319 labelsValues = np.array(sorted(labelsValues))
1321 arrayToSort = np.array(np.sort(importance_matrix, axis=0))
1323 importance_heatmap = self.axis.pcolor(arrayToSort, cmap=new_RdBu, vmin=0, vmax=100)
1329 self.axis.set_yticks(numpy.arange(importance_matrix.shape[0]) + 0.5, minor=
False)
1330 self.axis.set_xticks(numpy.arange(importance_matrix.shape[1]) + 0.5, minor=
False)
1332 self.axis.set_xticklabels(columns, minor=
False, rotation=90)
1335 if labelsValues.shape[0] < 6:
1337 self.axis.set_yticklabels(labelsValues[:, 1], minor=
False, size=58)
1340 self.axis.set_yticklabels(labelsValues[:, 1], minor=
False)
1342 self.axis.set_xticklabels([
''])
1351 for y
in range(labelsValues.shape[0]):
1352 self.axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % float(labelsValues[y][0]),
1354 horizontalalignment=
'center',
1355 verticalalignment=
'center')
1358 cb = self.figure.colorbar(importance_heatmap, ticks=[2, 98], orientation=
'vertical')
1359 cb.ax.tick_params(length=0)
1360 cb.ax.set_yticklabels([
r'${\rm low}$',
r'${\rm high}$'], size=60)
1362 self.axis.set_aspect(
'equal')
1368 Sets limits, title, axis-labels and legend of the plot
1373 def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
1374 new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
1375 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
1376 cmap(np.linspace(minval, maxval, n)))
1380 class CorrelationMatrix(Plotter):
1382 Plots correlation matrix
1391 def __init__(self, figure=None):
1393 Creates a new figure if None is given, sets the default plot parameters
1394 @param figure default draw figure which is used
1397 self.figure = matplotlib.figure.Figure(figsize=(38, 24))
1398 self.figure.set_tight_layout(
True)
1400 self.figure = figure
1402 gs = matplotlib.gridspec.GridSpec(16, 2)
1403 self.signal_axis = self.figure.add_subplot(gs[:14, 0])
1404 self.bckgrd_axis = self.figure.add_subplot(gs[:14, 1], sharey=self.signal_axis)
1406 self.colorbar_axis = self.figure.add_subplot(gs[15, :])
1408 self.axis = self.signal_axis
1410 super().__init__(self.figure, self.axis)
1412 def add(self, data, columns, signal_mask, bckgrd_mask, bkgrOutput):
1414 Add a new correlation plot.
1415 @param data pandas.DataFrame containing all data
1416 @param columns which are used to calculate the correlations
1419 signal_corr = numpy.corrcoef(numpy.vstack([data[column][signal_mask]
for column
in columns])) * 100
1420 bckgrd_corr = numpy.corrcoef(numpy.vstack([data[column][bckgrd_mask]
for column
in columns])) * 100
1422 mirrored_signal_corr = np.zeros(signal_corr.shape)
1423 mirrored_bckgrd_corr = np.zeros(bckgrd_corr.shape)
1425 for y
in range(signal_corr.shape[0]):
1426 for x
in range(signal_corr.shape[1]):
1427 mirrored_signal_corr[y, x] = signal_corr[y, signal_corr.shape[1] - 1 - x]
1429 for y
in range(bckgrd_corr.shape[0]):
1430 for x
in range(bckgrd_corr.shape[1]):
1431 mirrored_bckgrd_corr[y, x] = bckgrd_corr[y, bckgrd_corr.shape[1] - 1 - x]
1433 cRdBu = plt.get_cmap(
'RdBu')
1434 new_RdBu = truncate_colormap(cRdBu, 0.15, 0.85)
1435 signal_heatmap = self.signal_axis.pcolor(mirrored_signal_corr, cmap=new_RdBu, vmin=-100.0, vmax=100.0)
1443 for y
in range(mirrored_signal_corr.shape[0]):
1444 for x
in range(mirrored_signal_corr.shape[1]):
1445 outputWithRedundantMinus =
'%.0f' % mirrored_signal_corr[y, x]
1446 if outputWithRedundantMinus ==
'-0':
1447 mirrored_signal_corr[y, x] = 0
1449 for y
in range(mirrored_bckgrd_corr.shape[0]):
1450 for x
in range(mirrored_bckgrd_corr.shape[1]):
1451 outputWithRedundantMinus =
'%.0f' % mirrored_bckgrd_corr[y, x]
1452 if outputWithRedundantMinus ==
'-0':
1453 mirrored_bckgrd_corr[y, x] = 0
1455 self.signal_axis.invert_yaxis()
1456 self.signal_axis.xaxis.tick_top()
1457 self.bckgrd_axis.invert_yaxis()
1458 self.bckgrd_axis.xaxis.tick_top()
1461 self.signal_axis.set_xticks(numpy.arange(mirrored_signal_corr.shape[0]) + 0.5, minor=
False)
1462 self.signal_axis.set_yticks(numpy.arange(mirrored_signal_corr.shape[1]) + 0.5, minor=
False)
1467 self.bckgrd_axis.set_xticks(numpy.arange(mirrored_bckgrd_corr.shape[0]) + 0.5, minor=
False)
1468 self.bckgrd_axis.set_yticks(numpy.arange(mirrored_bckgrd_corr.shape[1]) + 0.5, minor=
False)
1470 if mirrored_signal_corr.shape[0] < 8:
1472 self.bckgrd_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90, size=58)
1473 self.bckgrd_axis.set_yticklabels(columns, minor=
False, size=58)
1474 self.signal_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90, size=58)
1475 self.signal_axis.set_yticklabels(columns, minor=
False, size=58)
1477 self.bckgrd_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90)
1478 self.bckgrd_axis.set_yticklabels(columns, minor=
False)
1479 self.signal_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90)
1480 self.signal_axis.set_yticklabels(columns, minor=
False)
1482 for y
in range(mirrored_signal_corr.shape[0]):
1483 for x
in range(mirrored_signal_corr.shape[1]):
1484 if mirrored_signal_corr.shape[0] > 24
and mirrored_signal_corr[y, x] < 0:
1485 self.signal_axis.text(x + 0.5, y + 0.5,
'-' +
r'$%.0f$' % abs(mirrored_signal_corr[y, x]),
1487 horizontalalignment=
'center',
1488 verticalalignment=
'center')
1490 self.signal_axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % mirrored_signal_corr[y, x],
1492 horizontalalignment=
'center',
1493 verticalalignment=
'center')
1495 for y
in range(mirrored_bckgrd_corr.shape[0]):
1496 for x
in range(mirrored_bckgrd_corr.shape[1]):
1497 if mirrored_bckgrd_corr.shape[0] > 24
and mirrored_bckgrd_corr[y, x] < 0:
1498 self.signal_axis.text(x + 0.5, y + 0.5,
'-' +
r'$%.0f$' % abs(mirrored_bckgrd_corr[y, x]),
1500 horizontalalignment=
'center',
1501 verticalalignment=
'center')
1503 self.bckgrd_axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % mirrored_bckgrd_corr[y, x],
1505 horizontalalignment=
'center',
1506 verticalalignment=
'center')
1508 cb = self.figure.colorbar(signal_heatmap, cax=self.colorbar_axis, ticks=[-92.3, 0, 92.5], orientation=
'horizontal')
1509 cb.ax.tick_params(length=0)
1510 cb.ax.set_xticklabels([
r'${\rm negative}$',
r'${\rm uncorrelated}$',
r'${\rm positive}$'], fontsize=60)
1512 if bkgrOutput == -1:
1513 self.figure.text(0.30, 0.11,
r'$B^0\,(q_{\rm MC} = +1)$', horizontalalignment=
'center', size=65)
1514 self.figure.text(0.74, 0.11,
r'$\bar{B}^0\,(q_{\rm MC} = -1)$', horizontalalignment=
'center', size=65)
1517 self.figure.text(0.27, 0.115,
r'${\rm Signal}$', horizontalalignment=
'center', size=65)
1518 self.figure.text(0.73, 0.115,
r'${\rm Background}$', horizontalalignment=
'center', size=65)
1524 Sets limits, title, axis-labels and legend of the plot
1526 matplotlib.artist.setp(self.bckgrd_axis.get_yticklabels(), visible=
False)
1530 if __name__ ==
'__main__':
1532 def get_data(N, columns):
1534 Creates fake data for example plots
1537 n = len(columns) - 1
1538 xs = numpy.random.normal(0, size=(N, n))
1539 xb = numpy.random.normal(1, size=(N, n))
1542 data = pandas.DataFrame(numpy.c_[numpy.r_[xs, xb], numpy.r_[ys, yb]], columns=columns)
1543 return data.reindex(numpy.random.permutation(data.index))
1547 seaborn.set(font_scale=3)
1548 seaborn.set_style(
'whitegrid')
1552 data = get_data(N, columns=[
'FastBDT',
'NeuroBayes',
'isSignal'])
1554 data.type.iloc[:N / 2] =
'Train'
1555 data.type.iloc[N / 2:] =
'Test'
1558 p.add(data,
'FastBDT')
1560 p.save(
'box_plot.png')
1562 p = VerboseDistribution()
1563 p.add(data,
'FastBDT')
1564 p.add(data,
'NeuroBayes')
1566 p.save(
'verbose_distribution_plot.png')
1568 p = PurityOverEfficiency()
1569 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1570 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1572 p.save(
'roc_purity_plot.png')
1574 p = RejectionOverEfficiency()
1575 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1576 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1578 p.save(
'roc_rejection_plot.png')
1581 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1582 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1584 p.save(
'diagonal_plot.png')
1587 p.add(data,
'FastBDT')
1588 p.add(data,
'NeuroBayes')
1590 p.save(
'distribution_plot.png')
1593 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1594 p.add(data,
'NeuroBayes', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1596 p.save(
'difference_plot.png')
1599 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1601 p.save(
'overtraining_plot.png')
1604 p.add(data,
'FastBDT',
'NeuroBayes', [0, 20, 40, 60, 80, 100], data[
'isSignal'] == 0)
1606 p.save(
'correlation_plot.png')
1608 p = CorrelationMatrix()
1609 data[
'FastBDT2'] = data[
'FastBDT']**2
1610 data[
'NeuroBayes2'] = data[
'NeuroBayes']**2
1611 data[
'FastBDT3'] = data[
'FastBDT']**3
1612 data[
'NeuroBayes3'] = data[
'NeuroBayes']**3
1613 p.add(data, [
'FastBDT',
'NeuroBayes',
'FastBDT2',
'NeuroBayes2',
'FastBDT3',
'NeuroBayes3'])
1615 p.save(
'correlation_matrix.png')
def calculate_flatness(f, p, w=None)
def weighted_mean_and_std(x, w)