17 matplotlib.rcParams.update({
'font.size': 40})
18 matplotlib.rcParams[
'text.usetex'] =
True
19 matplotlib.rcParams[
'text.latex.preamble'] = [
r"\usepackage{amsmath}"]
20 import matplotlib.pyplot
as plt
21 import matplotlib.artist
22 import matplotlib.figure
23 import matplotlib.gridspec
24 import matplotlib.colors
25 import matplotlib.patches
26 import matplotlib.ticker
28 import basf2_mva_evaluation.histogram
as histogram
30 from basf2
import B2INFO, B2WARNING
35 class Plotter(object):
37 Base class for all Plotters.
59 def __init__(self, figure=None, axis=None):
61 Creates a new figure and axis if None is given, sets the default plot parameters
62 @param figure default draw figure which is used
63 @param axis default draw axis which is used
65 B2INFO(
"Create new figure for class " + str(type(self)))
67 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
68 self.figure.set_tight_layout(
False)
73 self.axis = self.figure.add_subplot(1, 1, 1)
79 self.xmin, self.xmax = float(0), float(1)
80 self.ymin, self.ymax = float(0), float(1)
87 self.plot_kwargs =
None
89 self.errorbar_kwargs =
None
91 self.errorband_kwargs =
None
93 self.fill_kwargs =
None
95 self.set_plot_options()
96 self.set_errorbar_options()
97 self.set_errorband_options()
98 self.set_fill_options()
100 def add_subplot(self, gridspecs):
102 Adds a new subplot to the figure, updates all other axes
103 according to the given gridspec
104 @param gridspecs gridspecs for all axes including the new one
106 for gs, ax
in zip(gridspecs[:-1], self.figure.axes):
107 ax.set_position(gs.get_position(self.figure))
108 ax.set_subplotspec(gs)
109 axis = self.figure.add_subplot(gridspecs[-1], sharex=self.axis)
112 def save(self, filename):
114 Save the figure into a file
115 @param filename of the file
117 B2INFO(
"Save figure for class " + str(type(self)))
118 from matplotlib.backends.backend_agg
import FigureCanvasAgg
as FigureCanvas
119 canvas = FigureCanvas(self.figure)
120 canvas.print_figure(filename, dpi=50)
123 def set_plot_options(self, plot_kwargs={'linestyle':
''}):
125 Overrides default plot options for datapoint plot
126 @param plot_kwargs keyword arguments for the plot function
128 self.plot_kwargs = copy.copy(plot_kwargs)
131 def set_errorbar_options(self, errorbar_kwargs={'fmt':
'.',
'elinewidth': 3,
'alpha': 1}):
133 Overrides default errorbar options for datapoint errorbars
134 @param errorbar_kwargs keyword arguments for the errorbar function
136 self.errorbar_kwargs = copy.copy(errorbar_kwargs)
139 def set_errorband_options(self, errorband_kwargs={'alpha': 0.5}):
141 Overrides default errorband options for datapoint errorband
142 @param errorbar_kwargs keyword arguments for the fill_between function
144 self.errorband_kwargs = copy.copy(errorband_kwargs)
147 def set_fill_options(self, fill_kwargs=None):
149 Overrides default fill_between options for datapoint errorband
150 @param fill_kwargs keyword arguments for the fill_between function
152 self.fill_kwargs = copy.copy(fill_kwargs)
155 def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None):
157 Plot the given datapoints, with plot, errorbar and make a errorband with fill_between
158 @param x coordinates of the data points
159 @param y coordinates of the data points
160 @param xerr symmetric error on x data points
161 @param yerr symmetric error on y data points
164 plot_kwargs = copy.copy(self.plot_kwargs)
165 errorbar_kwargs = copy.copy(self.errorbar_kwargs)
166 errorband_kwargs = copy.copy(self.errorband_kwargs)
167 fill_kwargs = copy.copy(self.fill_kwargs)
169 if plot_kwargs
is None or 'color' not in plot_kwargs:
170 color = next(axis._get_lines.prop_cycler)
171 color = color[
'color']
172 plot_kwargs[
'color'] = color
174 color = plot_kwargs[
'color']
175 color = matplotlib.colors.ColorConverter().to_rgb(color)
176 patch = matplotlib.patches.Patch(color=color, alpha=0.7)
177 patch.get_color = patch.get_facecolor
180 if plot_kwargs
is not None:
181 p, = axis.plot(x, y, **plot_kwargs)
184 if errorbar_kwargs
is not None and (xerr
is not None or yerr
is not None):
185 if 'color' not in errorbar_kwargs:
186 errorbar_kwargs[
'color'] = color
187 if 'ecolor' not in errorbar_kwargs:
188 errorbar_kwargs[
'ecolor'] = [0.4 * x
for x
in color]
192 errorbar_kwargs[
'elinewidth'] = 5
193 e = axis.errorbar(x, y, xerr=xerr, yerr=yerr, **errorbar_kwargs)
196 if errorband_kwargs
is not None and yerr
is not None:
197 if 'color' not in errorband_kwargs:
198 errorband_kwargs[
'color'] = color
203 for _x, _y, _xe, _ye
in zip(x, y, xerr, yerr):
204 axis.add_patch(matplotlib.patches.Rectangle((_x - _xe, _y - _ye), 2 * _xe, 2 * _ye,
207 f = axis.fill_between(x, y - yerr, y + yerr, interpolate=
True, **errorband_kwargs)
209 if fill_kwargs
is not None:
210 axis.fill_between(x, y, 0, **fill_kwargs)
212 return (tuple(patches), p, e, f)
214 def add(self, *args, **kwargs):
216 Add a new plot to this plotter
218 return NotImplemented
220 def finish(self, *args, **kwargs):
222 Finish plotting and set labels, legends and stuff
224 return NotImplemented
226 def scale_limits(self):
228 Scale limits to increase distance to boundaries
230 self.ymin *= 1.0 - math.copysign(self.yscale, self.ymin)
231 self.ymax *= 1.0 + math.copysign(self.yscale, self.ymax)
232 self.xmin *= 1.0 - math.copysign(self.xscale, self.xmin)
233 self.xmax *= 1.0 + math.copysign(self.xscale, self.xmax)
237 class PurityAndEfficiencyOverCut(Plotter):
239 Plots the purity and the efficiency over the cut value (for cut choosing)
246 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
248 Add a new curve to the plot
249 @param data pandas.DataFrame containing all data
250 @param column which is used to calculate efficiency and purity for different cuts
251 @param signal_mask boolean numpy.array defining which events are signal events
252 @param bckgrd_mask boolean numpy.array defining which events are background events
253 @param weight_column column in data containing the weights for each event
256 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
259 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
260 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
262 efficiency, efficiency_error = hists.get_true_positives([
'Signal'])
263 purity, purity_error = hists.get_false_positives([
'Background'])
265 cuts = hists.bin_centers
267 self.xmin, self.xmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmax])
268 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(efficiency), numpy.nanmin(purity), self.ymin]), \
269 numpy.nanmax([numpy.nanmax(efficiency), numpy.nanmax(purity), self.ymax])
271 self.plots.append(self._plot_datapoints(self.axis, cuts, efficiency, xerr=0, yerr=efficiency_error))
274 self.labels.append(
"Efficiency")
276 self.labels.append(
"True positive")
278 self.plots.append(self._plot_datapoints(self.axis, cuts, purity, xerr=0, yerr=purity_error))
281 self.labels.append(
"Purity")
283 self.labels.append(
"False positive")
289 Sets limits, title, axis-labels and legend of the plot
291 self.axis.set_xlim((self.xmin, self.xmax))
292 self.axis.set_ylim((self.ymin, self.ymax))
293 self.axis.set_title(
"Classification Plot")
294 self.axis.get_xaxis().set_label_text(
'Cut Value')
295 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
299 class SignalToNoiseOverCut(Plotter):
301 Plots the signal to noise ratio over the cut value (for cut choosing)
308 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
310 Add a new curve to the plot
311 @param data pandas.DataFrame containing all data
312 @param column which is used to calculate signal to noise ratio for different cuts
313 @param signal_mask boolean numpy.array defining which events are signal events
314 @param bckgrd_mask boolean numpy.array defining which events are background events
315 @param weight_column column in data containing the weights for each event
318 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
320 signal2noise, signal2noise_error = hists.get_signal_to_noise([
'Signal'], [
'Background'])
322 cuts = hists.bin_centers
324 self.xmin, self.xmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmax])
325 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(signal2noise), self.ymin]), \
326 numpy.nanmax([numpy.nanmax(signal2noise), self.ymax])
328 self.plots.append(self._plot_datapoints(self.axis, cuts, signal2noise, xerr=0, yerr=signal2noise_error))
330 self.labels.append(column)
336 Sets limits, title, axis-labels and legend of the plot
338 self.axis.set_xlim((self.xmin, self.xmax))
339 self.axis.set_ylim((self.ymin, self.ymax))
340 self.axis.set_title(
"Signal to Noise Plot")
341 self.axis.get_xaxis().set_label_text(
'Cut Value')
342 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
346 class PurityOverEfficiency(Plotter):
348 Plots the purity over the efficiency also known as ROC curve
355 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
357 Add a new curve to the ROC plot
358 @param data pandas.DataFrame containing all data
359 @param column which is used to calculate efficiency and purity for different cuts
360 @param signal_mask boolean numpy.array defining which events are signal events
361 @param bckgrd_mask boolean numpy.array defining which events are background events
362 @param weight_column column in data containing the weights for each event
364 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
365 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
366 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
368 self.xmin, self.xmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmax])
369 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(purity), self.ymin]), numpy.nanmax([numpy.nanmax(purity), self.ymax])
371 p = self._plot_datapoints(self.axis, efficiency, purity, xerr=efficiency_error, yerr=purity_error)
373 if label
is not None:
374 self.labels.append(label)
376 self.labels.append(column)
381 Sets limits, title, axis-labels and legend of the plot
383 self.axis.set_xlim((self.xmin, self.xmax))
384 self.axis.set_ylim((self.ymin, self.ymax))
385 self.axis.set_title(
"ROC Purity Plot")
386 self.axis.get_xaxis().set_label_text(
'Efficiency')
387 self.axis.get_yaxis().set_label_text(
'Purity')
388 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
392 class RejectionOverEfficiency(Plotter):
394 Plots the rejection over the efficiency also known as ROC curve
401 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
403 Add a new curve to the ROC plot
404 @param data pandas.DataFrame containing all data
405 @param column which is used to calculate efficiency and purity for different cuts
406 @param signal_mask boolean numpy.array defining which events are signal events
407 @param bckgrd_mask boolean numpy.array defining which events are background events
408 @param weight_column column in data containing the weights for each event
410 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
411 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
412 rejection, rejection_error = hists.get_efficiency([
'Background'])
413 rejection = 1 - rejection
415 self.xmin, self.xmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmax])
416 self.ymin, self.ymax = numpy.nanmin([rejection.min(), self.ymin]), numpy.nanmax([rejection.max(), self.ymax])
418 auc = numpy.abs(numpy.trapz(rejection, efficiency))
420 p = self._plot_datapoints(self.axis, efficiency, rejection, xerr=efficiency_error, yerr=rejection_error)
422 if label
is not None:
423 self.labels.append(label[:10] +
r"$\ {\rm AUC}\ =\ $" +
r"${:.2f}$".format(auc))
425 self.labels.append(
r"${\rm AUC}\ =\ $" +
r"${:.2f}$".format(auc))
431 Sets limits, title, axis-labels and legend of the plot
433 self.axis.set_xlim((self.xmin, self.xmax))
434 self.axis.set_ylim((self.ymin, self.ymax))
436 self.axis.get_xaxis().set_tick_params(labelsize=60)
437 self.axis.get_yaxis().set_tick_params(labelsize=60)
439 self.axis.get_xaxis().labelpad = 20
440 self.axis.get_yaxis().labelpad = 20
441 self.axis.get_xaxis().set_label_text(
r'${\rm Signal\ Efficiency}$', fontsize=65)
442 self.axis.get_yaxis().set_label_text(
r'${\rm Background\ Rejection}$', fontsize=65)
443 self.axis.legend([x[0]
for x
in self.plots], self.labels, fancybox=
True, framealpha=0.5, fontsize=60, loc=3)
447 class Multiplot(Plotter):
449 Plots multiple other plots into a grid 3x?
456 def __init__(self, cls, number_of_plots, figure=None):
458 Creates a new figure if None is given, sets the default plot parameters
459 @param figure default draw figure which is used
462 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
463 self.figure.set_tight_layout(
True)
467 if number_of_plots == 1:
468 gs = matplotlib.gridspec.GridSpec(1, 1)
469 elif number_of_plots == 2:
470 gs = matplotlib.gridspec.GridSpec(1, 2)
471 elif number_of_plots == 3:
472 gs = matplotlib.gridspec.GridSpec(1, 3)
474 gs = matplotlib.gridspec.GridSpec(int(numpy.ceil(number_of_plots / 3)), 3)
477 self.sub_plots = [cls(self.figure, self.figure.add_subplot(gs[i // 3, i % 3]))
for i
in range(number_of_plots)]
478 self.axis = self.sub_plots[0].axis
479 super(Multiplot, self).
__init__(self.figure, self.axis)
481 def add(self, i, *args, **kwargs):
483 Call add function of ith subplot
484 @param i position of the subplot
486 self.sub_plots[i].add(*args, **kwargs)
490 Sets limits, title, axis-labels and legend of the plot
492 for plot
in self.sub_plots:
497 class Diagonal(Plotter):
499 Plots the purity in each bin over the classifier output.
506 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None):
508 Add a new curve to the Diagonal plot
509 @param data pandas.DataFrame containing all data
510 @param column which is used to calculate purity for different cuts
511 @param signal_mask boolean numpy.array defining which events are signal events
512 @param bckgrd_mask boolean numpy.array defining which events are background events
513 @param weight_column column in data containing the weights for each event
515 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
516 purity, purity_error = hists.get_purity_per_bin([
'Signal'], [
'Background'])
518 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
520 self.ymin, self.ymax = 0, 1
522 p = self._plot_datapoints(self.axis, hists.bin_centers, purity, xerr=hists.bin_widths / 2.0, yerr=purity_error)
524 self.labels.append(column)
529 Sets limits, title, axis-labels and legend of the plot
532 self.axis.
plot((0.0, 1.0), (0.0, 1.0), color=
'black')
533 self.axis.set_xlim((self.xmin, self.xmax))
534 self.axis.set_ylim((self.ymin, self.ymax))
535 self.axis.set_title(
"Diagonal Plot")
536 self.axis.get_xaxis().set_label_text(
'Classifier Output')
537 self.axis.get_yaxis().set_label_text(
'Purity Per Bin')
538 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
542 class Distribution(Plotter):
544 Plots distribution of a quantity
547 def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False,
548 keep_first_binning=False, range_in_std=None, logScale=False):
550 Creates a new figure and axis if None is given, sets the default plot parameters
551 @param figure default draw figure which is used
552 @param axis default draw axis which is used
553 @param normed true if histograms should be normed before drawing
554 @param keep_first_binning use the binning of the first distribution for further plots
555 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
557 super(Distribution, self).
__init__(figure, axis)
559 self.normed_to_all_entries = normed_to_all_entries
561 self.normed_to_bin_width = normed_to_bin_width
563 self.range_in_std = range_in_std
568 self.ymax = float(
'-inf')
570 self.xmin = float(
'inf')
572 self.xmax = float(
'-inf')
574 self.keep_first_binning = keep_first_binning
576 self.first_binning =
None
578 self.x_axis_label =
''
580 self.logScale =
False
584 def add(self, data, column, mask=None, weight_column=None, label=None, bins=50):
586 Add a new distribution to the plots
587 @param data pandas.DataFrame containing all data
588 @param column which is used to calculate distribution histogram
589 @param mask boolean numpy.array defining which events are used for the histogram
590 @param weight_column column in data containing the weights for each event
593 mask = numpy.ones(len(data)).astype(
'bool')
596 if self.keep_first_binning
and self.first_binning
is not None:
597 bins = self.first_binning
599 bins=bins, equal_frequency=
False, range_in_std=self.range_in_std)
600 if self.keep_first_binning
and self.first_binning
is None:
601 self.first_binning = hists.bins
602 hist, hist_error = hists.get_hist(
'Total')
603 self.binWidth = hists.bin_widths[1]
605 if self.normed_to_all_entries:
606 normalization = float(numpy.sum(hist))
607 hist = hist / normalization
608 hist_error = hist_error / normalization
610 if self.normed_to_bin_width:
611 hist = hist / hists.bin_widths
612 hist_error = hist_error / hists.bin_widths
614 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
615 self.ymin, self.ymax = numpy.nanmin([hist.min(), self.ymin]), numpy.nanmax([(hist + hist_error).max(), self.ymax])
617 p = self._plot_datapoints(self.axis, hists.bin_centers, hist, xerr=hists.bin_widths / 2, yerr=hist_error)
619 self.x_axis_label = column
621 self.labels.append(column)
623 self.labels.append(label)
628 Sets limits, title, axis-labels and legend of the plot
631 self.axis.set_xlim((self.xmin, self.xmax))
634 self.axis.set_yscale(
'log', nonposy=
'clip')
636 self.axis.set_ylim((self.ymin, self.ymax))
637 self.binWidth =
'{:8.2f}'.format(self.binWidth)
640 self.axis.get_xaxis().set_label_text(self.x_axis_label)
641 if self.normed_to_all_entries
and self.normed_to_bin_width:
642 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin / (# Entries * Bin Width)')
643 elif self.normed_to_all_entries:
645 self.axis.get_yaxis().set_label_text(
646 r'{$\frac{\rm Entries\hspace{0.25em} per\hspace{0.25em} Bin}{\rm Entries}\, /\, (' +
647 self.binWidth +
r'\,)$}', fontsize=65)
648 self.axis.get_yaxis().labelpad = 20
649 self.axis.get_yaxis().set_tick_params(labelsize=60)
650 elif self.normed_to_bin_width:
651 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin / Bin Width')
653 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin')
665 def __init__(self, figure=None, axis=None):
667 Creates a new figure and axis if None is given, sets the default plot parameters
668 @param figure default draw figure which is used
669 @param axis default draw axis which is used
671 super().
__init__(figure=figure, axis=axis)
674 self.x_axis_label =
""
676 def add(self, data, column, mask=None, weight_column=None):
678 Add a new boxplot to the plots
679 @param data pandas.DataFrame containing all data
680 @param column which is used to calculate boxplot quantities
681 @param mask boolean numpy.array defining which events are used for the histogram
682 @param weight_column column in data containing the weights for each event
685 mask = numpy.ones(len(data)).astype(
'bool')
686 x = data[column][mask]
687 if weight_column
is not None:
688 weight = data[weight_column][mask]
689 B2WARNING(
"Weights are currently not used in boxplot, due to limitations in matplotlib")
692 B2WARNING(
"Ignore empty boxplot.")
695 p = self.axis.boxplot(x, sym=
'k.', whis=1.5, vert=
False, patch_artist=
True, showmeans=
True, widths=1,
696 boxprops=dict(facecolor=
'blue', alpha=0.5),
701 self.labels.append(column)
702 self.x_axis_label = column
717 Sets limits, title, axis-labels and legend of the plot
719 matplotlib.artist.setp(self.axis.get_yaxis(), visible=
False)
720 self.axis.get_xaxis().set_label_text(self.x_axis_label)
721 self.axis.set_title(
"Box Plot")
725 class Difference(Plotter):
727 Plots the difference between two histograms
742 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
744 Creates a new figure and axis if None is given, sets the default plot parameters
745 @param figure default draw figure which is used
746 @param axis default draw axis which is used
747 @param normed normalize minuend and subtrahend before comparing them
748 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
750 super(Difference, self).
__init__(figure, axis)
752 self.shift_to_zero = shift_to_zero
760 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None):
762 Add a new difference plot
763 @param data pandas.DataFrame containing all data
764 @param column which is used to calculate distribution histogram
765 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
766 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
767 @param weight_column column in data containing the weights for each event
768 @param label label for the legend if None, the column name is used
771 hists =
histogram.Histograms(data, column, {
'Minuend': minuend_mask,
'Subtrahend': subtrahend_mask}, bins=bins,
772 weight_column=weight_column, equal_frequency=
False)
773 minuend, minuend_error = hists.get_hist(
'Minuend')
774 subtrahend, subtrahend_error = hists.get_hist(
'Subtrahend')
778 difference_error = difference_error / (numpy.sum(minuend) + numpy.sum(subtrahend))
779 minuend = minuend / numpy.sum(minuend)
780 subtrahend = subtrahend / numpy.sum(subtrahend)
781 difference = minuend - subtrahend
783 if self.shift_to_zero:
784 difference = difference - numpy.mean(difference)
786 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
787 self.ymin = min((difference - difference_error).min(), self.ymin)
788 self.ymax = max((difference + difference_error).max(), self.ymax)
790 p = self._plot_datapoints(self.axis, hists.bin_centers, difference, xerr=hists.bin_widths / 2, yerr=difference_error)
793 self.labels.append(label)
795 self.labels.append(column)
796 self.x_axis_label = column
799 def finish(self, line_color='black'):
801 Sets limits, title, axis-labels and legend of the plot
803 self.axis.
plot((self.xmin, self.xmax), (0, 0), color=line_color, linewidth=4)
805 self.axis.set_xlim((self.xmin, self.xmax))
806 self.axis.set_ylim((self.ymin, self.ymax))
807 self.axis.set_title(
"Difference Plot")
808 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
809 self.axis.get_xaxis().set_label_text(self.x_axis_label)
810 self.axis.set_ylabel(
r'{\rm Difference}', fontsize=40, labelpad=20)
811 self.axis.get_xaxis().grid(
True)
816 class normalizedResiduals(Plotter):
818 Plots the difference between two histograms
835 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
837 Creates a new figure and axis if None is given, sets the default plot parameters
838 @param figure default draw figure which is used
839 @param axis default draw axis which is used
840 @param normed normalize minuend and subtrahend before comparing them
841 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
843 super(normalizedResiduals, self).
__init__(figure, axis)
845 self.shift_to_zero = shift_to_zero
853 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None, bins=50, isNN=False):
855 Add a new difference plot
856 @param data pandas.DataFrame containing all data
857 @param column which is used to calculate distribution histogram
858 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
859 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
860 @param weight_column column in data containing the weights for each event
861 @param label label for the legend if None, the column name is used
864 hists =
histogram.Histograms(data, column, {
'Minuend': minuend_mask,
'Subtrahend': subtrahend_mask}, bins=bins,
865 weight_column=weight_column, equal_frequency=
False)
866 minuend, minuend_error = hists.get_hist(
'Minuend')
867 subtrahend, subtrahend_error = hists.get_hist(
'Subtrahend')
869 print(
"Here BinWidths Norm", hists.bin_widths)
873 difference_error = numpy.sqrt((minuend_error / numpy.sum(minuend))**2 + (subtrahend_error / numpy.sum(subtrahend))**2)
874 minuend = minuend / numpy.sum(minuend)
875 subtrahend = subtrahend / numpy.sum(subtrahend)
876 difference = minuend - subtrahend
877 normalizedRes = (minuend - subtrahend) / difference_error
879 if self.shift_to_zero:
880 difference = difference - numpy.mean(difference)
886 self.xmin = float(-1.0)
888 self.xmin, self.xmax = self.xmin, self.xmax
890 p = self._plot_datapoints(self.axis, hists.bin_centers, normalizedRes, xerr=hists.bin_widths / 2, yerr=1)
893 self.labels.append(label)
895 self.labels.append(column)
896 self.x_axis_label = column
899 def finish(self, line_color='black'):
901 Sets limits, title, axis-labels and legend of the plot
905 self.axis.set_xlim((self.xmin, self.xmax))
906 self.axis.set_ylim((-5, 5))
907 self.axis.set_title(
"Difference Plot")
908 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
909 self.axis.get_xaxis().set_label_text(self.x_axis_label)
910 self.axis.set_ylabel(
r'${\rm Normalized}$' +
'\n' +
r'${\rm Residuals}$', fontsize=40, labelpad=20)
911 self.axis.get_yaxis().set_ticks([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
912 self.axis.get_yaxis().set_ticklabels([
r'',
r'$-4$',
r'',
r'$-2$',
r'',
r'$0$',
r'',
r'$2$',
r'',
r'$4$',
r''], fontsize=45)
913 self.axis.get_xaxis().grid(
True)
915 self.axis.
plot((self.xmin, self.xmax), (3, 3), linewidth=4, color=
'#006600', linestyle=
'-')
916 self.axis.
plot((self.xmin, self.xmax), (1, 1), linewidth=4, color=
'b', linestyle=
'-')
917 self.axis.
plot((self.xmin, self.xmax), (-1, -1), linewidth=4, color=
'b', linestyle=
'-')
918 self.axis.
plot((self.xmin, self.xmax), (-3, -3), linewidth=4, color=
'#006600', linestyle=
'-')
924 class Overtraining(Plotter):
926 Create TMVA-like overtraining control plot for a classification training
940 Creates a new figure if None is given, sets the default plot parameters
941 @param figure default draw figure which is used
944 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
945 self.figure.set_tight_layout(
True)
949 gs = matplotlib.gridspec.GridSpec(5, 1)
950 self.axis = self.figure.add_subplot(gs[:3, :])
951 self.axis_d1 = self.figure.add_subplot(gs[3, :], sharex=self.axis)
952 self.axis_d2 = self.figure.add_subplot(gs[4, :], sharex=self.axis)
954 super(Overtraining, self).
__init__(self.figure, self.axis)
956 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None, bkgrOutput=0, isNN=False):
958 Add a new overtraining plot, I recommend to raw only one overtraining plot at the time,
959 otherwise there are too many curves in the plot to reconize anything in the plot.
960 @param data pandas.DataFrame containing all data
961 @param column which is used to calculate distribution histogram
962 @param train_mask boolean numpy.array defining which events are training events
963 @param test_mask boolean numpy.array defining which events are test events
964 @param signal_mask boolean numpy.array defining which events are signal events
965 @param bckgrd_mask boolean numpy.array defining which events are background events
966 @param weight_column column in data containing the weights for each event
968 distribution = Distribution(self.figure, self.axis, normed_to_all_entries=
True)
973 bins = list(range(-51, 55, 1))
974 for i
in range(0, len(bins)):
975 bins[i] = float(bins[i]) / 25
977 bins = list(range(-51, 55, 1))
978 for i
in range(0, len(bins)):
979 bins[i] = float(bins[i]) / 50
982 distribution.logScale =
True
983 distribution.labels = [
r'{\rm Test-Bkgr.}',
r'{\rm Train-Bkgr.}',
r'{\rm Test-Signal}',
r'{\rm Train-Signal}']
985 distribution.labels = [
986 r'{\rm Test-$\bar{B}^{0}$}',
987 r'{\rm Train-$\bar{B}^{0}$}',
988 r'{\rm Test-$B^{0}$}',
989 r'{\rm Train-$B^{0}$}']
991 distribution.set_plot_options(self.plot_kwargs)
993 distribution.set_errorbar_options({
'fmt':
'o',
'elinewidth': 5,
'alpha': 1,
'markersize': 20,
'ecolor':
'w'})
995 distribution.set_errorband_options(
None)
996 distribution.add(data, column, test_mask & bckgrd_mask, weight_column,
None, bins)
997 distribution.add(data, column, test_mask & signal_mask, weight_column,
None, bins)
999 distribution.set_errorbar_options(
None)
1001 distribution.set_plot_options({
'color': distribution.plots[0][0][0].get_color(
1002 ),
'drawstyle':
'steps-mid',
'linestyle':
'dashed',
'lw': 5})
1003 distribution.set_fill_options(
None)
1004 distribution.add(data, column, train_mask & bckgrd_mask, weight_column,
None, bins)
1005 distribution.set_plot_options({
'color': distribution.plots[1][0][0].get_color(
1006 ),
'drawstyle':
'steps-mid',
'linestyle':
'solid',
'lw': 5})
1008 distribution.add(data, column, train_mask & signal_mask, weight_column,
None, bins)
1010 distribution.finish()
1012 p1 = distribution.axis.errorbar([], [], xerr=0, yerr=0, elinewidth=5, mew=2, ecolor=
'w',
1014 fmt=
'o', mfc=distribution.plots[0][0][0].get_color(),
1015 mec=distribution.plots[0][0][0].get_color(), markersize=20, label=
r'${\rm Test-Bkgr.}$')
1016 p2, = distribution.axis.plot([], label=
r'${\rm Train-Bkgr.}$', linewidth=5,
1017 linestyle=
'dashed', c=distribution.plots[0][0][0].get_color())
1018 p3 = distribution.axis.errorbar([], [], xerr=0, yerr=0, elinewidth=5, mew=2, ecolor=
'w',
1020 fmt=
'o', mfc=distribution.plots[1][0][0].get_color(),
1021 mec=distribution.plots[1][0][0].get_color(), markersize=20, label=
r'${\rm Test-Signal}$')
1022 p4, = distribution.axis.plot([], label=
r'${\rm Train-Signal}$', linewidth=5,
1023 linestyle=
'solid', alpha=0.9, c=distribution.plots[1][0][0].get_color())
1025 distribution.axis.legend([p1, p2, p3, p4], distribution.labels, loc=
'best', fancybox=
True, framealpha=0.5, fontsize=60)
1027 self.plot_kwargs[
'color'] = distribution.plots[0][0][0].get_color()
1028 difference_bckgrd = normalizedResiduals(self.figure, self.axis_d1, shift_to_zero=
True, normed=
True)
1029 difference_bckgrd.set_plot_options(self.plot_kwargs)
1030 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
1031 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
1032 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column,
None, bins, isNN)
1033 self.axis_d1.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
1034 self.axis_d1.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
1036 difference_bckgrd.plots = difference_bckgrd.labels = []
1037 difference_bckgrd.finish(line_color=distribution.plots[0][0][0].get_color())
1039 self.plot_kwargs[
'color'] = distribution.plots[1][0][0].get_color()
1040 difference_signal = normalizedResiduals(self.figure, self.axis_d2, shift_to_zero=
True, normed=
True)
1041 difference_signal.set_plot_options(self.plot_kwargs)
1042 difference_signal.set_errorbar_options(self.errorbar_kwargs)
1043 difference_signal.set_errorband_options(self.errorband_kwargs)
1044 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column,
None, bins, isNN)
1045 self.axis_d2.set_xlim((difference_signal.xmin, difference_signal.xmax))
1046 self.axis_d2.set_ylim((difference_signal.ymin, difference_signal.ymax))
1047 difference_signal.plots = difference_signal.labels = []
1048 difference_signal.finish(line_color=distribution.plots[1][0][0].get_color())
1076 def finish(self, xLabel=r'${\rm Classifier\ Output}$'):
1078 Sets limits, title, axis-labels and legend of the plot
1081 self.axis_d1.set_title(
"")
1082 self.axis_d2.set_title(
"")
1083 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=
False)
1084 matplotlib.artist.setp(self.axis_d1.get_xticklabels(), visible=
False)
1085 self.axis.get_xaxis().set_label_text(
'')
1086 self.axis_d1.get_xaxis().set_label_text(
'')
1087 self.axis_d2.get_xaxis().set_label_text(xLabel, fontsize=85)
1088 self.axis_d2.get_xaxis().labelpad = 20
1089 self.axis_d2.get_xaxis().set_tick_params(labelsize=60)
1093 class VerboseDistribution(Plotter):
1095 Plots distribution of a quantity including boxplots
1101 def __init__(self, figure=None, axis=None, normed=False, range_in_std=None):
1103 Creates a new figure and axis if None is given, sets the default plot parameters
1104 @param figure default draw figure which is used
1105 @param axis default draw axis which is used
1106 @param normed true if the histograms should be normed before drawing
1107 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
1109 super(VerboseDistribution, self).
__init__(figure, axis)
1111 self.normed = normed
1113 self.range_in_std = range_in_std
1116 self.distribution = Distribution(self.figure, self.axis, normed_to_all_entries=self.normed, range_in_std=self.range_in_std)
1118 def add(self, data, column, mask=None, weight_column=None, label=None):
1120 Add a new distribution plot, with additional information like a boxplot compared to
1121 the ordinary Distribution plot.
1122 @param data pandas.DataFrame containing all data
1123 @param column which is used to calculate distribution histogram
1124 @param mask boolean numpy.array defining which events are used for the distribution histogram
1125 @param weight_column column in data containing the weights for each event
1127 self.distribution.set_plot_options(self.plot_kwargs)
1128 self.distribution.set_errorbar_options(self.errorbar_kwargs)
1129 self.distribution.set_errorband_options(self.errorband_kwargs)
1130 self.distribution.add(data, column, mask, weight_column, label=label)
1132 n = len(self.box_axes) + 1
1133 gs = matplotlib.gridspec.GridSpec(4 * n, 1)
1134 gridspecs = [gs[:3 * n, :]] + [gs[3 * n + i, :]
for i
in range(n)]
1135 box_axis = self.add_subplot(gridspecs)
1137 if self.range_in_std
is not None:
1140 mask = mask & (data[column] > (mean - self.range_in_std * std)) & (data[column] < (mean + self.range_in_std * std))
1141 box = Box(self.figure, box_axis)
1142 box.add(data, column, mask, weight_column)
1143 if len(box.plots) > 0:
1144 box.plots[0][
'boxes'][0].set_facecolor(self.distribution.plots[-1][0][0].get_color())
1147 self.box_axes.append(box_axis)
1152 Sets limits, title, axis-labels and legend of the plot
1154 self.distribution.finish()
1155 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=
False)
1156 self.axis.get_xaxis().set_label_text(
'')
1157 for box_axis
in self.box_axes[:-1]:
1158 matplotlib.artist.setp(box_axis.get_xticklabels(), visible=
False)
1159 box_axis.set_title(
"")
1160 box_axis.get_xaxis().set_label_text(
'')
1161 self.box_axes[-1].set_title(
"")
1162 self.axis.set_title(
"Distribution Plot")
1163 self.axis.legend([x[0]
for x
in self.distribution.plots], self.distribution.labels,
1164 loc=
'best', fancybox=
True, framealpha=0.5)
1168 class Correlation(Plotter):
1170 Plots change of a distribution of a quantity depending on the cut on a classifier
1183 Creates a new figure if None is given, sets the default plot parameters
1184 @param figure default draw figure which is used
1187 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
1188 self.figure.set_tight_layout(
True)
1190 self.figure = figure
1192 gs = matplotlib.gridspec.GridSpec(3, 2)
1193 self.axis = self.figure.add_subplot(gs[0, :])
1194 self.axis_d1 = self.figure.add_subplot(gs[1, :], sharex=self.axis)
1195 self.axis_d2 = self.figure.add_subplot(gs[2, :], sharex=self.axis)
1197 super(Correlation, self).
__init__(self.figure, self.axis)
1199 def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None):
1201 Add a new correlation plot.
1202 @param data pandas.DataFrame containing all data
1203 @param column which is used to calculate distribution histogram
1204 @param cut_column which is used to calculate cut on the other quantity defined by column
1205 @param quantiles list of quantiles between 0 and 100, defining the different cuts
1206 @param weight_column column in data containing the weights for each event
1208 if len(data[cut_column]) == 0:
1209 B2WARNING(
"Ignore empty Correlation.")
1212 axes = [self.axis, self.axis_d1, self.axis_d2]
1214 for i, (l, m)
in enumerate([(
'.', signal_mask | bckgrd_mask), (
'S', signal_mask), (
'B', bckgrd_mask)]):
1216 if weight_column
is not None:
1217 weights = numpy.array(data[weight_column][m])
1219 weights = numpy.ones(len(data[column][m]))
1223 xrange = np.percentile(data[column][m], [5, 95]).astype(np.float32)
1225 colormap = plt.get_cmap(
'coolwarm')
1226 tmp, x = np.histogram(data[column][m], bins=100,
1227 range=xrange, normed=
True, weights=weights)
1228 bin_center = ((x + np.roll(x, 1)) / 2)[1:]
1229 axes[i].
plot(bin_center, tmp, color=
'black', lw=1)
1231 for quantil
in np.arange(5, 100, 5):
1232 cut = np.percentile(data[cut_column][m], quantil)
1233 sel = data[cut_column][m] >= cut
1234 y, x = np.histogram(data[column][m][sel], bins=100,
1235 range=xrange, normed=
True, weights=weights[sel])
1236 bin_center = ((x + np.roll(x, 1)) / 2)[1:]
1237 axes[i].fill_between(bin_center, tmp, y, color=colormap(quantil / 100.0))
1240 axes[i].set_ylim(bottom=0)
1243 axes[i].set_title(
r'Distribution for different quantiles: $\mathrm{{Flatness}}_{} = {:.3f}$'.format(l, flatness_score))
1248 Sets limits, title, axis-labels and legend of the plot
1253 class TSNE(Plotter):
1255 Plots multivariate distribution using TSNE algorithm
1258 def add(self, data, columns, *masks):
1260 Add a new correlation plot.
1261 @param data pandas.DataFrame containing all data
1262 @param columns which are used to calculate the correlations
1263 @param masks different classes to show in TSNE
1267 import sklearn.manifold
1268 model = sklearn.manifold.TSNE(n_components=2, random_state=0)
1269 data = numpy.array([data[column]
for column
in columns]).T
1272 data = numpy.array([data[column][mask]
for column
in columns]).T
1273 data = model.transform(data)
1274 self.axis.scatter(data[:, 0], data[:, 1])
1276 print(
"Cannot create TSNE plot. Install sklearn if you want it")
1281 Sets limits, title, axis-labels and legend of the plot
1286 class Importance(Plotter):
1288 Plots importance matrix
1291 def add(self, data, columns, variables, displayHeatMap):
1293 Add a new correlation plot.
1294 @param data pandas.DataFrame containing all data
1295 @param columns which are used to calculate the correlations
1297 self.figure.set_tight_layout(
True)
1300 width = (numpy.max(x) - numpy.min(x))
1302 return numpy.zeros(x.shape)
1303 return (x - numpy.min(x)) / width * 100
1305 importance_matrix = numpy.vstack([norm(data[column])
for column
in columns]).T
1307 cRdBu = plt.get_cmap(
'RdBu')
1308 new_RdBu = truncate_colormap(cRdBu, 0.5, 0.85)
1311 labels = list(variables)
1313 for y
in range(importance_matrix.shape[0]):
1314 for x
in range(importance_matrix.shape[1]):
1315 labelsValues.append([importance_matrix[y, x], labels[y]])
1317 labelsValues = np.array(sorted(labelsValues))
1319 arrayToSort = np.array(np.sort(importance_matrix, axis=0))
1321 importance_heatmap = self.axis.pcolor(arrayToSort, cmap=new_RdBu, vmin=0, vmax=100)
1327 self.axis.set_yticks(numpy.arange(importance_matrix.shape[0]) + 0.5, minor=
False)
1328 self.axis.set_xticks(numpy.arange(importance_matrix.shape[1]) + 0.5, minor=
False)
1330 self.axis.set_xticklabels(columns, minor=
False, rotation=90)
1333 if labelsValues.shape[0] < 6:
1335 self.axis.set_yticklabels(labelsValues[:, 1], minor=
False, size=58)
1338 self.axis.set_yticklabels(labelsValues[:, 1], minor=
False)
1340 self.axis.set_xticklabels([
''])
1349 for y
in range(labelsValues.shape[0]):
1350 self.axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % float(labelsValues[y][0]),
1352 horizontalalignment=
'center',
1353 verticalalignment=
'center')
1356 cb = self.figure.colorbar(importance_heatmap, ticks=[2, 98], orientation=
'vertical')
1357 cb.ax.tick_params(length=0)
1358 cb.ax.set_yticklabels([
r'${\rm low}$',
r'${\rm high}$'], size=60)
1360 self.axis.set_aspect(
'equal')
1366 Sets limits, title, axis-labels and legend of the plot
1371 def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
1372 new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
1373 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
1374 cmap(np.linspace(minval, maxval, n)))
1378 class CorrelationMatrix(Plotter):
1380 Plots correlation matrix
1391 Creates a new figure if None is given, sets the default plot parameters
1392 @param figure default draw figure which is used
1395 self.figure = matplotlib.figure.Figure(figsize=(38, 24))
1396 self.figure.set_tight_layout(
True)
1398 self.figure = figure
1400 gs = matplotlib.gridspec.GridSpec(16, 2)
1401 self.signal_axis = self.figure.add_subplot(gs[:14, 0])
1402 self.bckgrd_axis = self.figure.add_subplot(gs[:14, 1], sharey=self.signal_axis)
1404 self.colorbar_axis = self.figure.add_subplot(gs[15, :])
1406 self.axis = self.signal_axis
1408 super(CorrelationMatrix, self).
__init__(self.figure, self.axis)
1410 def add(self, data, columns, signal_mask, bckgrd_mask, bkgrOutput):
1412 Add a new correlation plot.
1413 @param data pandas.DataFrame containing all data
1414 @param columns which are used to calculate the correlations
1417 signal_corr = numpy.corrcoef(numpy.vstack([data[column][signal_mask]
for column
in columns])) * 100
1418 bckgrd_corr = numpy.corrcoef(numpy.vstack([data[column][bckgrd_mask]
for column
in columns])) * 100
1420 mirrored_signal_corr = np.zeros(signal_corr.shape)
1421 mirrored_bckgrd_corr = np.zeros(bckgrd_corr.shape)
1423 for y
in range(signal_corr.shape[0]):
1424 for x
in range(signal_corr.shape[1]):
1425 mirrored_signal_corr[y, x] = signal_corr[y, signal_corr.shape[1] - 1 - x]
1427 for y
in range(bckgrd_corr.shape[0]):
1428 for x
in range(bckgrd_corr.shape[1]):
1429 mirrored_bckgrd_corr[y, x] = bckgrd_corr[y, bckgrd_corr.shape[1] - 1 - x]
1431 cRdBu = plt.get_cmap(
'RdBu')
1432 new_RdBu = truncate_colormap(cRdBu, 0.15, 0.85)
1433 signal_heatmap = self.signal_axis.pcolor(mirrored_signal_corr, cmap=new_RdBu, vmin=-100.0, vmax=100.0)
1434 bckgrd_heatmap = self.bckgrd_axis.pcolor(mirrored_bckgrd_corr, cmap=new_RdBu, vmin=-100.0, vmax=100.0)
1441 for y
in range(mirrored_signal_corr.shape[0]):
1442 for x
in range(mirrored_signal_corr.shape[1]):
1443 outputWithRedundantMinus =
'%.0f' % mirrored_signal_corr[y, x]
1444 if outputWithRedundantMinus ==
'-0':
1445 mirrored_signal_corr[y, x] = 0
1447 for y
in range(mirrored_bckgrd_corr.shape[0]):
1448 for x
in range(mirrored_bckgrd_corr.shape[1]):
1449 outputWithRedundantMinus =
'%.0f' % mirrored_bckgrd_corr[y, x]
1450 if outputWithRedundantMinus ==
'-0':
1451 mirrored_bckgrd_corr[y, x] = 0
1453 self.signal_axis.invert_yaxis()
1454 self.signal_axis.xaxis.tick_top()
1455 self.bckgrd_axis.invert_yaxis()
1456 self.bckgrd_axis.xaxis.tick_top()
1459 self.signal_axis.set_xticks(numpy.arange(mirrored_signal_corr.shape[0]) + 0.5, minor=
False)
1460 self.signal_axis.set_yticks(numpy.arange(mirrored_signal_corr.shape[1]) + 0.5, minor=
False)
1465 self.bckgrd_axis.set_xticks(numpy.arange(mirrored_bckgrd_corr.shape[0]) + 0.5, minor=
False)
1466 self.bckgrd_axis.set_yticks(numpy.arange(mirrored_bckgrd_corr.shape[1]) + 0.5, minor=
False)
1468 if mirrored_signal_corr.shape[0] < 8:
1470 self.bckgrd_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90, size=58)
1471 self.bckgrd_axis.set_yticklabels(columns, minor=
False, size=58)
1472 self.signal_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90, size=58)
1473 self.signal_axis.set_yticklabels(columns, minor=
False, size=58)
1475 self.bckgrd_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90)
1476 self.bckgrd_axis.set_yticklabels(columns, minor=
False)
1477 self.signal_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90)
1478 self.signal_axis.set_yticklabels(columns, minor=
False)
1480 for y
in range(mirrored_signal_corr.shape[0]):
1481 for x
in range(mirrored_signal_corr.shape[1]):
1482 if mirrored_signal_corr.shape[0] > 24
and mirrored_signal_corr[y, x] < 0:
1483 self.signal_axis.text(x + 0.5, y + 0.5,
'-' +
r'$%.0f$' % abs(mirrored_signal_corr[y, x]),
1485 horizontalalignment=
'center',
1486 verticalalignment=
'center')
1488 self.signal_axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % mirrored_signal_corr[y, x],
1490 horizontalalignment=
'center',
1491 verticalalignment=
'center')
1493 for y
in range(mirrored_bckgrd_corr.shape[0]):
1494 for x
in range(mirrored_bckgrd_corr.shape[1]):
1495 if mirrored_bckgrd_corr.shape[0] > 24
and mirrored_bckgrd_corr[y, x] < 0:
1496 self.signal_axis.text(x + 0.5, y + 0.5,
'-' +
r'$%.0f$' % abs(mirrored_bckgrd_corr[y, x]),
1498 horizontalalignment=
'center',
1499 verticalalignment=
'center')
1501 self.bckgrd_axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % mirrored_bckgrd_corr[y, x],
1503 horizontalalignment=
'center',
1504 verticalalignment=
'center')
1506 cb = self.figure.colorbar(signal_heatmap, cax=self.colorbar_axis, ticks=[-92.3, 0, 92.5], orientation=
'horizontal')
1507 cb.ax.tick_params(length=0)
1508 cb.ax.set_xticklabels([
r'${\rm negative}$',
r'${\rm uncorrelated}$',
r'${\rm positive}$'], fontsize=60)
1510 if bkgrOutput == -1:
1511 self.figure.text(0.30, 0.11,
r'$B^0\,(q_{\rm MC} = +1)$', horizontalalignment=
'center', size=65)
1512 self.figure.text(0.74, 0.11,
r'$\bar{B}^0\,(q_{\rm MC} = -1)$', horizontalalignment=
'center', size=65)
1515 self.figure.text(0.27, 0.115,
r'${\rm Signal}$', horizontalalignment=
'center', size=65)
1516 self.figure.text(0.73, 0.115,
r'${\rm Background}$', horizontalalignment=
'center', size=65)
1522 Sets limits, title, axis-labels and legend of the plot
1524 matplotlib.artist.setp(self.bckgrd_axis.get_yticklabels(), visible=
False)
1528 if __name__ ==
'__main__':
1530 def get_data(N, columns):
1532 Creates fake data for example plots
1535 n = len(columns) - 1
1536 xs = numpy.random.normal(0, size=(N, n))
1537 xb = numpy.random.normal(1, size=(N, n))
1540 data = pandas.DataFrame(numpy.c_[numpy.r_[xs, xb], numpy.r_[ys, yb]], columns=columns)
1541 return data.reindex(numpy.random.permutation(data.index))
1545 seaborn.set(font_scale=3)
1546 seaborn.set_style(
'whitegrid')
1550 data = get_data(N, columns=[
'FastBDT',
'NeuroBayes',
'isSignal'])
1552 data.type.iloc[:N / 2] =
'Train'
1553 data.type.iloc[N / 2:] =
'Test'
1556 p.add(data,
'FastBDT')
1558 p.save(
'box_plot.png')
1560 p = VerboseDistribution()
1561 p.add(data,
'FastBDT')
1562 p.add(data,
'NeuroBayes')
1564 p.save(
'verbose_distribution_plot.png')
1566 p = PurityOverEfficiency()
1567 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1568 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1570 p.save(
'roc_purity_plot.png')
1572 p = RejectionOverEfficiency()
1573 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1574 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1576 p.save(
'roc_rejection_plot.png')
1579 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1580 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1582 p.save(
'diagonal_plot.png')
1585 p.add(data,
'FastBDT')
1586 p.add(data,
'NeuroBayes')
1588 p.save(
'distribution_plot.png')
1591 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1592 p.add(data,
'NeuroBayes', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1594 p.save(
'difference_plot.png')
1597 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1599 p.save(
'overtraining_plot.png')
1602 p.add(data,
'FastBDT',
'NeuroBayes', [0, 20, 40, 60, 80, 100], data[
'isSignal'] == 0)
1604 p.save(
'correlation_plot.png')
1606 p = CorrelationMatrix()
1607 data[
'FastBDT2'] = data[
'FastBDT']**2
1608 data[
'NeuroBayes2'] = data[
'NeuroBayes']**2
1609 data[
'FastBDT3'] = data[
'FastBDT']**3
1610 data[
'NeuroBayes3'] = data[
'NeuroBayes']**3
1611 p.add(data, [
'FastBDT',
'NeuroBayes',
'FastBDT2',
'NeuroBayes2',
'FastBDT3',
'NeuroBayes3'])
1613 p.save(
'correlation_matrix.png')