15 from basf2
import B2INFO, B2WARNING
16 import basf2_mva_evaluation.histogram
as histogram
17 import matplotlib.ticker
18 import matplotlib.patches
19 import matplotlib.colors
20 import matplotlib.gridspec
21 import matplotlib.figure
22 import matplotlib.artist
23 import matplotlib.pyplot
as plt
33 matplotlib.rcParams.update({
'font.size': 40})
34 matplotlib.rcParams[
'text.usetex'] =
True
35 matplotlib.rcParams[
'text.latex.preamble'] = [
r"\usepackage{amsmath}"]
38 class Plotter(object):
40 Base class for all Plotters.
62 def __init__(self, figure=None, axis=None):
64 Creates a new figure and axis if None is given, sets the default plot parameters
65 @param figure default draw figure which is used
66 @param axis default draw axis which is used
68 B2INFO(
"Create new figure for class " + str(type(self)))
70 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
71 self.figure.set_tight_layout(
False)
76 self.axis = self.figure.add_subplot(1, 1, 1)
82 self.xmin, self.xmax = float(0), float(1)
83 self.ymin, self.ymax = float(0), float(1)
90 self.plot_kwargs =
None
92 self.errorbar_kwargs =
None
94 self.errorband_kwargs =
None
96 self.fill_kwargs =
None
98 self.set_plot_options()
99 self.set_errorbar_options()
100 self.set_errorband_options()
101 self.set_fill_options()
103 def add_subplot(self, gridspecs):
105 Adds a new subplot to the figure, updates all other axes
106 according to the given gridspec
107 @param gridspecs gridspecs for all axes including the new one
109 for gs, ax
in zip(gridspecs[:-1], self.figure.axes):
110 ax.set_position(gs.get_position(self.figure))
111 ax.set_subplotspec(gs)
112 axis = self.figure.add_subplot(gridspecs[-1], sharex=self.axis)
115 def save(self, filename):
117 Save the figure into a file
118 @param filename of the file
120 B2INFO(
"Save figure for class " + str(type(self)))
121 from matplotlib.backends.backend_agg
import FigureCanvasAgg
as FigureCanvas
122 canvas = FigureCanvas(self.figure)
123 canvas.print_figure(filename, dpi=50)
126 def set_plot_options(self, plot_kwargs={'linestyle':
''}):
128 Overrides default plot options for datapoint plot
129 @param plot_kwargs keyword arguments for the plot function
131 self.plot_kwargs = copy.copy(plot_kwargs)
134 def set_errorbar_options(self, errorbar_kwargs={'fmt':
'.',
'elinewidth': 3,
'alpha': 1}):
136 Overrides default errorbar options for datapoint errorbars
137 @param errorbar_kwargs keyword arguments for the errorbar function
139 self.errorbar_kwargs = copy.copy(errorbar_kwargs)
142 def set_errorband_options(self, errorband_kwargs={'alpha': 0.5}):
144 Overrides default errorband options for datapoint errorband
145 @param errorbar_kwargs keyword arguments for the fill_between function
147 self.errorband_kwargs = copy.copy(errorband_kwargs)
150 def set_fill_options(self, fill_kwargs=None):
152 Overrides default fill_between options for datapoint errorband
153 @param fill_kwargs keyword arguments for the fill_between function
155 self.fill_kwargs = copy.copy(fill_kwargs)
158 def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None):
160 Plot the given datapoints, with plot, errorbar and make a errorband with fill_between
161 @param x coordinates of the data points
162 @param y coordinates of the data points
163 @param xerr symmetric error on x data points
164 @param yerr symmetric error on y data points
167 plot_kwargs = copy.copy(self.plot_kwargs)
168 errorbar_kwargs = copy.copy(self.errorbar_kwargs)
169 errorband_kwargs = copy.copy(self.errorband_kwargs)
170 fill_kwargs = copy.copy(self.fill_kwargs)
172 if plot_kwargs
is None or 'color' not in plot_kwargs:
173 color = next(axis._get_lines.prop_cycler)
174 color = color[
'color']
175 plot_kwargs[
'color'] = color
177 color = plot_kwargs[
'color']
178 color = matplotlib.colors.ColorConverter().to_rgb(color)
179 patch = matplotlib.patches.Patch(color=color, alpha=0.7)
180 patch.get_color = patch.get_facecolor
183 if plot_kwargs
is not None:
184 p, = axis.plot(x, y, **plot_kwargs)
187 if errorbar_kwargs
is not None and (xerr
is not None or yerr
is not None):
188 if 'color' not in errorbar_kwargs:
189 errorbar_kwargs[
'color'] = color
190 if 'ecolor' not in errorbar_kwargs:
191 errorbar_kwargs[
'ecolor'] = [0.4 * x
for x
in color]
195 errorbar_kwargs[
'elinewidth'] = 5
196 e = axis.errorbar(x, y, xerr=xerr, yerr=yerr, **errorbar_kwargs)
199 if errorband_kwargs
is not None and yerr
is not None:
200 if 'color' not in errorband_kwargs:
201 errorband_kwargs[
'color'] = color
206 for _x, _y, _xe, _ye
in zip(x, y, xerr, yerr):
207 axis.add_patch(matplotlib.patches.Rectangle((_x - _xe, _y - _ye), 2 * _xe, 2 * _ye,
210 f = axis.fill_between(x, y - yerr, y + yerr, interpolate=
True, **errorband_kwargs)
212 if fill_kwargs
is not None:
213 axis.fill_between(x, y, 0, **fill_kwargs)
215 return (tuple(patches), p, e, f)
217 def add(self, *args, **kwargs):
219 Add a new plot to this plotter
221 return NotImplemented
223 def finish(self, *args, **kwargs):
225 Finish plotting and set labels, legends and stuff
227 return NotImplemented
229 def scale_limits(self):
231 Scale limits to increase distance to boundaries
233 self.ymin *= 1.0 - math.copysign(self.yscale, self.ymin)
234 self.ymax *= 1.0 + math.copysign(self.yscale, self.ymax)
235 self.xmin *= 1.0 - math.copysign(self.xscale, self.xmin)
236 self.xmax *= 1.0 + math.copysign(self.xscale, self.xmax)
240 class PurityAndEfficiencyOverCut(Plotter):
242 Plots the purity and the efficiency over the cut value (for cut choosing)
249 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
251 Add a new curve to the plot
252 @param data pandas.DataFrame containing all data
253 @param column which is used to calculate efficiency and purity for different cuts
254 @param signal_mask boolean numpy.array defining which events are signal events
255 @param bckgrd_mask boolean numpy.array defining which events are background events
256 @param weight_column column in data containing the weights for each event
259 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
262 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
263 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
265 efficiency, efficiency_error = hists.get_true_positives([
'Signal'])
266 purity, purity_error = hists.get_false_positives([
'Background'])
268 cuts = hists.bin_centers
270 self.xmin, self.xmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmax])
271 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(efficiency), numpy.nanmin(purity), self.ymin]), \
272 numpy.nanmax([numpy.nanmax(efficiency), numpy.nanmax(purity), self.ymax])
274 self.plots.append(self._plot_datapoints(self.axis, cuts, efficiency, xerr=0, yerr=efficiency_error))
277 self.labels.append(
"Efficiency")
279 self.labels.append(
"True positive")
281 self.plots.append(self._plot_datapoints(self.axis, cuts, purity, xerr=0, yerr=purity_error))
284 self.labels.append(
"Purity")
286 self.labels.append(
"False positive")
292 Sets limits, title, axis-labels and legend of the plot
294 self.axis.set_xlim((self.xmin, self.xmax))
295 self.axis.set_ylim((self.ymin, self.ymax))
296 self.axis.set_title(
"Classification Plot")
297 self.axis.get_xaxis().set_label_text(
'Cut Value')
298 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
302 class SignalToNoiseOverCut(Plotter):
304 Plots the signal to noise ratio over the cut value (for cut choosing)
311 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
313 Add a new curve to the plot
314 @param data pandas.DataFrame containing all data
315 @param column which is used to calculate signal to noise ratio for different cuts
316 @param signal_mask boolean numpy.array defining which events are signal events
317 @param bckgrd_mask boolean numpy.array defining which events are background events
318 @param weight_column column in data containing the weights for each event
321 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
323 signal2noise, signal2noise_error = hists.get_signal_to_noise([
'Signal'], [
'Background'])
325 cuts = hists.bin_centers
327 self.xmin, self.xmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmax])
328 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(signal2noise), self.ymin]), \
329 numpy.nanmax([numpy.nanmax(signal2noise), self.ymax])
331 self.plots.append(self._plot_datapoints(self.axis, cuts, signal2noise, xerr=0, yerr=signal2noise_error))
333 self.labels.append(column)
339 Sets limits, title, axis-labels and legend of the plot
341 self.axis.set_xlim((self.xmin, self.xmax))
342 self.axis.set_ylim((self.ymin, self.ymax))
343 self.axis.set_title(
"Signal to Noise Plot")
344 self.axis.get_xaxis().set_label_text(
'Cut Value')
345 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
349 class PurityOverEfficiency(Plotter):
351 Plots the purity over the efficiency also known as ROC curve
358 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
360 Add a new curve to the ROC plot
361 @param data pandas.DataFrame containing all data
362 @param column which is used to calculate efficiency and purity for different cuts
363 @param signal_mask boolean numpy.array defining which events are signal events
364 @param bckgrd_mask boolean numpy.array defining which events are background events
365 @param weight_column column in data containing the weights for each event
367 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
368 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
369 purity, purity_error = hists.get_purity([
'Signal'], [
'Background'])
371 self.xmin, self.xmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmax])
372 self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(purity), self.ymin]), numpy.nanmax([numpy.nanmax(purity), self.ymax])
374 p = self._plot_datapoints(self.axis, efficiency, purity, xerr=efficiency_error, yerr=purity_error)
376 if label
is not None:
377 self.labels.append(label)
379 self.labels.append(column)
384 Sets limits, title, axis-labels and legend of the plot
386 self.axis.set_xlim((self.xmin, self.xmax))
387 self.axis.set_ylim((self.ymin, self.ymax))
388 self.axis.set_title(
"ROC Purity Plot")
389 self.axis.get_xaxis().set_label_text(
'Efficiency')
390 self.axis.get_yaxis().set_label_text(
'Purity')
391 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
395 class RejectionOverEfficiency(Plotter):
397 Plots the rejection over the efficiency also known as ROC curve
404 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
406 Add a new curve to the ROC plot
407 @param data pandas.DataFrame containing all data
408 @param column which is used to calculate efficiency and purity for different cuts
409 @param signal_mask boolean numpy.array defining which events are signal events
410 @param bckgrd_mask boolean numpy.array defining which events are background events
411 @param weight_column column in data containing the weights for each event
413 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
414 efficiency, efficiency_error = hists.get_efficiency([
'Signal'])
415 rejection, rejection_error = hists.get_efficiency([
'Background'])
416 rejection = 1 - rejection
418 self.xmin, self.xmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmax])
419 self.ymin, self.ymax = numpy.nanmin([rejection.min(), self.ymin]), numpy.nanmax([rejection.max(), self.ymax])
421 auc = numpy.abs(numpy.trapz(rejection, efficiency))
423 p = self._plot_datapoints(self.axis, efficiency, rejection, xerr=efficiency_error, yerr=rejection_error)
425 if label
is not None:
426 self.labels.append(label[:10] +
r"$\ {\rm AUC}\ =\ $" +
r"${:.2f}$".format(auc))
428 self.labels.append(
r"${\rm AUC}\ =\ $" +
r"${:.2f}$".format(auc))
434 Sets limits, title, axis-labels and legend of the plot
436 self.axis.set_xlim((self.xmin, self.xmax))
437 self.axis.set_ylim((self.ymin, self.ymax))
439 self.axis.get_xaxis().set_tick_params(labelsize=60)
440 self.axis.get_yaxis().set_tick_params(labelsize=60)
442 self.axis.get_xaxis().labelpad = 20
443 self.axis.get_yaxis().labelpad = 20
444 self.axis.get_xaxis().set_label_text(
r'${\rm Signal\ Efficiency}$', fontsize=65)
445 self.axis.get_yaxis().set_label_text(
r'${\rm Background\ Rejection}$', fontsize=65)
446 self.axis.legend([x[0]
for x
in self.plots], self.labels, fancybox=
True, framealpha=0.5, fontsize=60, loc=3)
450 class Multiplot(Plotter):
452 Plots multiple other plots into a grid 3x?
459 def __init__(self, cls, number_of_plots, figure=None):
461 Creates a new figure if None is given, sets the default plot parameters
462 @param figure default draw figure which is used
465 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
466 self.figure.set_tight_layout(
True)
470 if number_of_plots == 1:
471 gs = matplotlib.gridspec.GridSpec(1, 1)
472 elif number_of_plots == 2:
473 gs = matplotlib.gridspec.GridSpec(1, 2)
474 elif number_of_plots == 3:
475 gs = matplotlib.gridspec.GridSpec(1, 3)
477 gs = matplotlib.gridspec.GridSpec(int(numpy.ceil(number_of_plots / 3)), 3)
480 self.sub_plots = [cls(self.figure, self.figure.add_subplot(gs[i // 3, i % 3]))
for i
in range(number_of_plots)]
481 self.axis = self.sub_plots[0].axis
482 super(Multiplot, self).__init__(self.figure, self.axis)
484 def add(self, i, *args, **kwargs):
486 Call add function of ith subplot
487 @param i position of the subplot
489 self.sub_plots[i].add(*args, **kwargs)
493 Sets limits, title, axis-labels and legend of the plot
495 for plot
in self.sub_plots:
500 class Diagonal(Plotter):
502 Plots the purity in each bin over the classifier output.
509 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None):
511 Add a new curve to the Diagonal plot
512 @param data pandas.DataFrame containing all data
513 @param column which is used to calculate purity for different cuts
514 @param signal_mask boolean numpy.array defining which events are signal events
515 @param bckgrd_mask boolean numpy.array defining which events are background events
516 @param weight_column column in data containing the weights for each event
518 hists =
histogram.Histograms(data, column, {
'Signal': signal_mask,
'Background': bckgrd_mask}, weight_column=weight_column)
519 purity, purity_error = hists.get_purity_per_bin([
'Signal'], [
'Background'])
521 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
523 self.ymin, self.ymax = 0, 1
525 p = self._plot_datapoints(self.axis, hists.bin_centers, purity, xerr=hists.bin_widths / 2.0, yerr=purity_error)
527 self.labels.append(column)
532 Sets limits, title, axis-labels and legend of the plot
535 self.axis.
plot((0.0, 1.0), (0.0, 1.0), color=
'black')
536 self.axis.set_xlim((self.xmin, self.xmax))
537 self.axis.set_ylim((self.ymin, self.ymax))
538 self.axis.set_title(
"Diagonal Plot")
539 self.axis.get_xaxis().set_label_text(
'Classifier Output')
540 self.axis.get_yaxis().set_label_text(
'Purity Per Bin')
541 self.axis.legend([x[0]
for x
in self.plots], self.labels, loc=
'best', fancybox=
True, framealpha=0.5)
545 class Distribution(Plotter):
547 Plots distribution of a quantity
550 def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False,
551 keep_first_binning=False, range_in_std=None, logScale=False):
553 Creates a new figure and axis if None is given, sets the default plot parameters
554 @param figure default draw figure which is used
555 @param axis default draw axis which is used
556 @param normed true if histograms should be normed before drawing
557 @param keep_first_binning use the binning of the first distribution for further plots
558 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
560 super(Distribution, self).__init__(figure, axis)
562 self.normed_to_all_entries = normed_to_all_entries
564 self.normed_to_bin_width = normed_to_bin_width
566 self.range_in_std = range_in_std
571 self.ymax = float(
'-inf')
573 self.xmin = float(
'inf')
575 self.xmax = float(
'-inf')
577 self.keep_first_binning = keep_first_binning
579 self.first_binning =
None
581 self.x_axis_label =
''
583 self.logScale =
False
587 def add(self, data, column, mask=None, weight_column=None, label=None, bins=50):
589 Add a new distribution to the plots
590 @param data pandas.DataFrame containing all data
591 @param column which is used to calculate distribution histogram
592 @param mask boolean numpy.array defining which events are used for the histogram
593 @param weight_column column in data containing the weights for each event
596 mask = numpy.ones(len(data)).astype(
'bool')
599 if self.keep_first_binning
and self.first_binning
is not None:
600 bins = self.first_binning
602 bins=bins, equal_frequency=
False, range_in_std=self.range_in_std)
603 if self.keep_first_binning
and self.first_binning
is None:
604 self.first_binning = hists.bins
605 hist, hist_error = hists.get_hist(
'Total')
606 self.binWidth = hists.bin_widths[1]
608 if self.normed_to_all_entries:
609 normalization = float(numpy.sum(hist))
610 hist = hist / normalization
611 hist_error = hist_error / normalization
613 if self.normed_to_bin_width:
614 hist = hist / hists.bin_widths
615 hist_error = hist_error / hists.bin_widths
617 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
618 self.ymin, self.ymax = numpy.nanmin([hist.min(), self.ymin]), numpy.nanmax([(hist + hist_error).max(), self.ymax])
620 p = self._plot_datapoints(self.axis, hists.bin_centers, hist, xerr=hists.bin_widths / 2, yerr=hist_error)
622 self.x_axis_label = column
624 self.labels.append(column)
626 self.labels.append(label)
631 Sets limits, title, axis-labels and legend of the plot
634 self.axis.set_xlim((self.xmin, self.xmax))
637 self.axis.set_yscale(
'log', nonposy=
'clip')
639 self.axis.set_ylim((self.ymin, self.ymax))
640 self.binWidth =
'{:8.2f}'.format(self.binWidth)
643 self.axis.get_xaxis().set_label_text(self.x_axis_label)
644 if self.normed_to_all_entries
and self.normed_to_bin_width:
645 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin / (# Entries * Bin Width)')
646 elif self.normed_to_all_entries:
648 self.axis.get_yaxis().set_label_text(
649 r'{$\frac{\rm Entries\hspace{0.25em} per\hspace{0.25em} Bin}{\rm Entries}\, /\, (' +
650 self.binWidth +
r'\,)$}', fontsize=65)
651 self.axis.get_yaxis().labelpad = 20
652 self.axis.get_yaxis().set_tick_params(labelsize=60)
653 elif self.normed_to_bin_width:
654 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin / Bin Width')
656 self.axis.get_yaxis().set_label_text(
r'# Entries per Bin')
668 def __init__(self, figure=None, axis=None):
670 Creates a new figure and axis if None is given, sets the default plot parameters
671 @param figure default draw figure which is used
672 @param axis default draw axis which is used
674 super().__init__(figure=figure, axis=axis)
677 self.x_axis_label =
""
679 def add(self, data, column, mask=None, weight_column=None):
681 Add a new boxplot to the plots
682 @param data pandas.DataFrame containing all data
683 @param column which is used to calculate boxplot quantities
684 @param mask boolean numpy.array defining which events are used for the histogram
685 @param weight_column column in data containing the weights for each event
688 mask = numpy.ones(len(data)).astype(
'bool')
689 x = data[column][mask]
690 if weight_column
is not None:
692 B2WARNING(
"Weights are currently not used in boxplot, due to limitations in matplotlib")
695 B2WARNING(
"Ignore empty boxplot.")
698 p = self.axis.boxplot(x, sym=
'k.', whis=1.5, vert=
False, patch_artist=
True, showmeans=
True, widths=1,
699 boxprops=dict(facecolor=
'blue', alpha=0.5),
704 self.labels.append(column)
705 self.x_axis_label = column
720 Sets limits, title, axis-labels and legend of the plot
722 matplotlib.artist.setp(self.axis.get_yaxis(), visible=
False)
723 self.axis.get_xaxis().set_label_text(self.x_axis_label)
724 self.axis.set_title(
"Box Plot")
728 class Difference(Plotter):
730 Plots the difference between two histograms
745 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
747 Creates a new figure and axis if None is given, sets the default plot parameters
748 @param figure default draw figure which is used
749 @param axis default draw axis which is used
750 @param normed normalize minuend and subtrahend before comparing them
751 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
753 super(Difference, self).__init__(figure, axis)
755 self.shift_to_zero = shift_to_zero
763 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None):
765 Add a new difference plot
766 @param data pandas.DataFrame containing all data
767 @param column which is used to calculate distribution histogram
768 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
769 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
770 @param weight_column column in data containing the weights for each event
771 @param label label for the legend if None, the column name is used
774 hists =
histogram.Histograms(data, column, {
'Minuend': minuend_mask,
'Subtrahend': subtrahend_mask}, bins=bins,
775 weight_column=weight_column, equal_frequency=
False)
776 minuend, minuend_error = hists.get_hist(
'Minuend')
777 subtrahend, subtrahend_error = hists.get_hist(
'Subtrahend')
781 difference_error = difference_error / (numpy.sum(minuend) + numpy.sum(subtrahend))
782 minuend = minuend / numpy.sum(minuend)
783 subtrahend = subtrahend / numpy.sum(subtrahend)
784 difference = minuend - subtrahend
786 if self.shift_to_zero:
787 difference = difference - numpy.mean(difference)
789 self.xmin, self.xmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmax)
790 self.ymin = min((difference - difference_error).min(), self.ymin)
791 self.ymax = max((difference + difference_error).max(), self.ymax)
793 p = self._plot_datapoints(self.axis, hists.bin_centers, difference, xerr=hists.bin_widths / 2, yerr=difference_error)
796 self.labels.append(label)
798 self.labels.append(column)
799 self.x_axis_label = column
802 def finish(self, line_color='black'):
804 Sets limits, title, axis-labels and legend of the plot
806 self.axis.
plot((self.xmin, self.xmax), (0, 0), color=line_color, linewidth=4)
808 self.axis.set_xlim((self.xmin, self.xmax))
809 self.axis.set_ylim((self.ymin, self.ymax))
810 self.axis.set_title(
"Difference Plot")
811 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
812 self.axis.get_xaxis().set_label_text(self.x_axis_label)
813 self.axis.set_ylabel(
r'{\rm Difference}', fontsize=40, labelpad=20)
814 self.axis.get_xaxis().grid(
True)
819 class normalizedResiduals(Plotter):
821 Plots the difference between two histograms
838 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
840 Creates a new figure and axis if None is given, sets the default plot parameters
841 @param figure default draw figure which is used
842 @param axis default draw axis which is used
843 @param normed normalize minuend and subtrahend before comparing them
844 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
846 super(normalizedResiduals, self).__init__(figure, axis)
848 self.shift_to_zero = shift_to_zero
856 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None, bins=50, isNN=False):
858 Add a new difference plot
859 @param data pandas.DataFrame containing all data
860 @param column which is used to calculate distribution histogram
861 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
862 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
863 @param weight_column column in data containing the weights for each event
864 @param label label for the legend if None, the column name is used
867 hists =
histogram.Histograms(data, column, {
'Minuend': minuend_mask,
'Subtrahend': subtrahend_mask}, bins=bins,
868 weight_column=weight_column, equal_frequency=
False)
869 minuend, minuend_error = hists.get_hist(
'Minuend')
870 subtrahend, subtrahend_error = hists.get_hist(
'Subtrahend')
872 print(
"Here BinWidths Norm", hists.bin_widths)
876 difference_error = numpy.sqrt((minuend_error / numpy.sum(minuend))**2 + (subtrahend_error / numpy.sum(subtrahend))**2)
877 minuend = minuend / numpy.sum(minuend)
878 subtrahend = subtrahend / numpy.sum(subtrahend)
879 difference = minuend - subtrahend
880 normalizedRes = (minuend - subtrahend) / difference_error
882 if self.shift_to_zero:
883 difference = difference - numpy.mean(difference)
889 self.xmin = float(-1.0)
891 self.xmin, self.xmax = self.xmin, self.xmax
893 p = self._plot_datapoints(self.axis, hists.bin_centers, normalizedRes, xerr=hists.bin_widths / 2, yerr=1)
896 self.labels.append(label)
898 self.labels.append(column)
899 self.x_axis_label = column
902 def finish(self, line_color='black'):
904 Sets limits, title, axis-labels and legend of the plot
908 self.axis.set_xlim((self.xmin, self.xmax))
909 self.axis.set_ylim((-5, 5))
910 self.axis.set_title(
"Difference Plot")
911 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
912 self.axis.get_xaxis().set_label_text(self.x_axis_label)
913 self.axis.set_ylabel(
r'${\rm Normalized}$' +
'\n' +
r'${\rm Residuals}$', fontsize=40, labelpad=20)
914 self.axis.get_yaxis().set_ticks([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
915 self.axis.get_yaxis().set_ticklabels([
r'',
r'$-4$',
r'',
r'$-2$',
r'',
r'$0$',
r'',
r'$2$',
r'',
r'$4$',
r''], fontsize=45)
916 self.axis.get_xaxis().grid(
True)
918 self.axis.
plot((self.xmin, self.xmax), (3, 3), linewidth=4, color=
'#006600', linestyle=
'-')
919 self.axis.
plot((self.xmin, self.xmax), (1, 1), linewidth=4, color=
'b', linestyle=
'-')
920 self.axis.
plot((self.xmin, self.xmax), (-1, -1), linewidth=4, color=
'b', linestyle=
'-')
921 self.axis.
plot((self.xmin, self.xmax), (-3, -3), linewidth=4, color=
'#006600', linestyle=
'-')
927 class Overtraining(Plotter):
929 Create TMVA-like overtraining control plot for a classification training
941 def __init__(self, figure=None):
943 Creates a new figure if None is given, sets the default plot parameters
944 @param figure default draw figure which is used
947 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
948 self.figure.set_tight_layout(
True)
952 gs = matplotlib.gridspec.GridSpec(5, 1)
953 self.axis = self.figure.add_subplot(gs[:3, :])
954 self.axis_d1 = self.figure.add_subplot(gs[3, :], sharex=self.axis)
955 self.axis_d2 = self.figure.add_subplot(gs[4, :], sharex=self.axis)
957 super(Overtraining, self).__init__(self.figure, self.axis)
959 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None, bkgrOutput=0, isNN=False):
961 Add a new overtraining plot, I recommend to raw only one overtraining plot at the time,
962 otherwise there are too many curves in the plot to recognize anything in the plot.
963 @param data pandas.DataFrame containing all data
964 @param column which is used to calculate distribution histogram
965 @param train_mask boolean numpy.array defining which events are training events
966 @param test_mask boolean numpy.array defining which events are test events
967 @param signal_mask boolean numpy.array defining which events are signal events
968 @param bckgrd_mask boolean numpy.array defining which events are background events
969 @param weight_column column in data containing the weights for each event
971 distribution = Distribution(self.figure, self.axis, normed_to_all_entries=
True)
976 bins = list(range(-51, 55, 1))
977 for i
in range(0, len(bins)):
978 bins[i] = float(bins[i]) / 25
980 bins = list(range(-51, 55, 1))
981 for i
in range(0, len(bins)):
982 bins[i] = float(bins[i]) / 50
985 distribution.logScale =
True
986 distribution.labels = [
r'{\rm Test-Bkgr.}',
r'{\rm Train-Bkgr.}',
r'{\rm Test-Signal}',
r'{\rm Train-Signal}']
988 distribution.labels = [
989 r'{\rm Test-$\bar{B}^{0}$}',
990 r'{\rm Train-$\bar{B}^{0}$}',
991 r'{\rm Test-$B^{0}$}',
992 r'{\rm Train-$B^{0}$}']
994 distribution.set_plot_options(self.plot_kwargs)
996 distribution.set_errorbar_options({
'fmt':
'o',
'elinewidth': 5,
'alpha': 1,
'markersize': 20,
'ecolor':
'w'})
998 distribution.set_errorband_options(
None)
999 distribution.add(data, column, test_mask & bckgrd_mask, weight_column,
None, bins)
1000 distribution.add(data, column, test_mask & signal_mask, weight_column,
None, bins)
1002 distribution.set_errorbar_options(
None)
1004 distribution.set_plot_options({
'color': distribution.plots[0][0][0].get_color(
1005 ),
'drawstyle':
'steps-mid',
'linestyle':
'dashed',
'lw': 5})
1006 distribution.set_fill_options(
None)
1007 distribution.add(data, column, train_mask & bckgrd_mask, weight_column,
None, bins)
1008 distribution.set_plot_options({
'color': distribution.plots[1][0][0].get_color(
1009 ),
'drawstyle':
'steps-mid',
'linestyle':
'solid',
'lw': 5})
1011 distribution.add(data, column, train_mask & signal_mask, weight_column,
None, bins)
1013 distribution.finish()
1015 p1 = distribution.axis.errorbar([], [], xerr=0, yerr=0, elinewidth=5, mew=2, ecolor=
'w',
1017 fmt=
'o', mfc=distribution.plots[0][0][0].get_color(),
1018 mec=distribution.plots[0][0][0].get_color(), markersize=20, label=
r'${\rm Test-Bkgr.}$')
1019 p2, = distribution.axis.plot([], label=
r'${\rm Train-Bkgr.}$', linewidth=5,
1020 linestyle=
'dashed', c=distribution.plots[0][0][0].get_color())
1021 p3 = distribution.axis.errorbar([], [], xerr=0, yerr=0, elinewidth=5, mew=2, ecolor=
'w',
1023 fmt=
'o', mfc=distribution.plots[1][0][0].get_color(),
1024 mec=distribution.plots[1][0][0].get_color(), markersize=20, label=
r'${\rm Test-Signal}$')
1025 p4, = distribution.axis.plot([], label=
r'${\rm Train-Signal}$', linewidth=5,
1026 linestyle=
'solid', alpha=0.9, c=distribution.plots[1][0][0].get_color())
1028 distribution.axis.legend([p1, p2, p3, p4], distribution.labels, loc=
'best', fancybox=
True, framealpha=0.5, fontsize=60)
1030 self.plot_kwargs[
'color'] = distribution.plots[0][0][0].get_color()
1031 difference_bckgrd = normalizedResiduals(self.figure, self.axis_d1, shift_to_zero=
True, normed=
True)
1032 difference_bckgrd.set_plot_options(self.plot_kwargs)
1033 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
1034 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
1035 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column,
None, bins, isNN)
1036 self.axis_d1.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
1037 self.axis_d1.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
1039 difference_bckgrd.plots = difference_bckgrd.labels = []
1040 difference_bckgrd.finish(line_color=distribution.plots[0][0][0].get_color())
1042 self.plot_kwargs[
'color'] = distribution.plots[1][0][0].get_color()
1043 difference_signal = normalizedResiduals(self.figure, self.axis_d2, shift_to_zero=
True, normed=
True)
1044 difference_signal.set_plot_options(self.plot_kwargs)
1045 difference_signal.set_errorbar_options(self.errorbar_kwargs)
1046 difference_signal.set_errorband_options(self.errorband_kwargs)
1047 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column,
None, bins, isNN)
1048 self.axis_d2.set_xlim((difference_signal.xmin, difference_signal.xmax))
1049 self.axis_d2.set_ylim((difference_signal.ymin, difference_signal.ymax))
1050 difference_signal.plots = difference_signal.labels = []
1051 difference_signal.finish(line_color=distribution.plots[1][0][0].get_color())
1079 def finish(self, xLabel=r'${\rm Classifier\ Output}$'):
1081 Sets limits, title, axis-labels and legend of the plot
1084 self.axis_d1.set_title(
"")
1085 self.axis_d2.set_title(
"")
1086 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=
False)
1087 matplotlib.artist.setp(self.axis_d1.get_xticklabels(), visible=
False)
1088 self.axis.get_xaxis().set_label_text(
'')
1089 self.axis_d1.get_xaxis().set_label_text(
'')
1090 self.axis_d2.get_xaxis().set_label_text(xLabel, fontsize=85)
1091 self.axis_d2.get_xaxis().labelpad = 20
1092 self.axis_d2.get_xaxis().set_tick_params(labelsize=60)
1096 class VerboseDistribution(Plotter):
1098 Plots distribution of a quantity including boxplots
1104 def __init__(self, figure=None, axis=None, normed=False, range_in_std=None):
1106 Creates a new figure and axis if None is given, sets the default plot parameters
1107 @param figure default draw figure which is used
1108 @param axis default draw axis which is used
1109 @param normed true if the histograms should be normed before drawing
1110 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
1112 super(VerboseDistribution, self).__init__(figure, axis)
1114 self.normed = normed
1116 self.range_in_std = range_in_std
1119 self.distribution = Distribution(self.figure, self.axis, normed_to_all_entries=self.normed, range_in_std=self.range_in_std)
1121 def add(self, data, column, mask=None, weight_column=None, label=None):
1123 Add a new distribution plot, with additional information like a boxplot compared to
1124 the ordinary Distribution plot.
1125 @param data pandas.DataFrame containing all data
1126 @param column which is used to calculate distribution histogram
1127 @param mask boolean numpy.array defining which events are used for the distribution histogram
1128 @param weight_column column in data containing the weights for each event
1130 self.distribution.set_plot_options(self.plot_kwargs)
1131 self.distribution.set_errorbar_options(self.errorbar_kwargs)
1132 self.distribution.set_errorband_options(self.errorband_kwargs)
1133 self.distribution.add(data, column, mask, weight_column, label=label)
1135 n = len(self.box_axes) + 1
1136 gs = matplotlib.gridspec.GridSpec(4 * n, 1)
1137 gridspecs = [gs[:3 * n, :]] + [gs[3 * n + i, :]
for i
in range(n)]
1138 box_axis = self.add_subplot(gridspecs)
1140 if self.range_in_std
is not None:
1143 mask = mask & (data[column] > (mean - self.range_in_std * std)) & (data[column] < (mean + self.range_in_std * std))
1144 box = Box(self.figure, box_axis)
1145 box.add(data, column, mask, weight_column)
1146 if len(box.plots) > 0:
1147 box.plots[0][
'boxes'][0].set_facecolor(self.distribution.plots[-1][0][0].get_color())
1150 self.box_axes.append(box_axis)
1155 Sets limits, title, axis-labels and legend of the plot
1157 self.distribution.finish()
1158 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=
False)
1159 self.axis.get_xaxis().set_label_text(
'')
1160 for box_axis
in self.box_axes[:-1]:
1161 matplotlib.artist.setp(box_axis.get_xticklabels(), visible=
False)
1162 box_axis.set_title(
"")
1163 box_axis.get_xaxis().set_label_text(
'')
1164 self.box_axes[-1].set_title(
"")
1165 self.axis.set_title(
"Distribution Plot")
1166 self.axis.legend([x[0]
for x
in self.distribution.plots], self.distribution.labels,
1167 loc=
'best', fancybox=
True, framealpha=0.5)
1171 class Correlation(Plotter):
1173 Plots change of a distribution of a quantity depending on the cut on a classifier
1184 def __init__(self, figure=None):
1186 Creates a new figure if None is given, sets the default plot parameters
1187 @param figure default draw figure which is used
1190 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
1191 self.figure.set_tight_layout(
True)
1193 self.figure = figure
1195 gs = matplotlib.gridspec.GridSpec(3, 2)
1196 self.axis = self.figure.add_subplot(gs[0, :])
1197 self.axis_d1 = self.figure.add_subplot(gs[1, :], sharex=self.axis)
1198 self.axis_d2 = self.figure.add_subplot(gs[2, :], sharex=self.axis)
1200 super(Correlation, self).__init__(self.figure, self.axis)
1202 def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None):
1204 Add a new correlation plot.
1205 @param data pandas.DataFrame containing all data
1206 @param column which is used to calculate distribution histogram
1207 @param cut_column which is used to calculate cut on the other quantity defined by column
1208 @param quantiles list of quantiles between 0 and 100, defining the different cuts
1209 @param weight_column column in data containing the weights for each event
1211 if len(data[cut_column]) == 0:
1212 B2WARNING(
"Ignore empty Correlation.")
1215 axes = [self.axis, self.axis_d1, self.axis_d2]
1217 for i, (l, m)
in enumerate([(
'.', signal_mask | bckgrd_mask), (
'S', signal_mask), (
'B', bckgrd_mask)]):
1219 if weight_column
is not None:
1220 weights = numpy.array(data[weight_column][m])
1222 weights = numpy.ones(len(data[column][m]))
1226 xrange = np.percentile(data[column][m], [5, 95]).astype(np.float32)
1228 colormap = plt.get_cmap(
'coolwarm')
1229 tmp, x = np.histogram(data[column][m], bins=100,
1230 range=xrange, normed=
True, weights=weights)
1231 bin_center = ((x + np.roll(x, 1)) / 2)[1:]
1232 axes[i].
plot(bin_center, tmp, color=
'black', lw=1)
1234 for quantil
in np.arange(5, 100, 5):
1235 cut = np.percentile(data[cut_column][m], quantil)
1236 sel = data[cut_column][m] >= cut
1237 y, x = np.histogram(data[column][m][sel], bins=100,
1238 range=xrange, normed=
True, weights=weights[sel])
1239 bin_center = ((x + np.roll(x, 1)) / 2)[1:]
1240 axes[i].fill_between(bin_center, tmp, y, color=colormap(quantil / 100.0))
1243 axes[i].set_ylim(bottom=0)
1246 axes[i].set_title(
r'Distribution for different quantiles: $\mathrm{{Flatness}}_{} = {:.3f}$'.format(l, flatness_score))
1251 Sets limits, title, axis-labels and legend of the plot
1256 class TSNE(Plotter):
1258 Plots multivariate distribution using TSNE algorithm
1261 def add(self, data, columns, *masks):
1263 Add a new correlation plot.
1264 @param data pandas.DataFrame containing all data
1265 @param columns which are used to calculate the correlations
1266 @param masks different classes to show in TSNE
1270 import sklearn.manifold
1271 model = sklearn.manifold.TSNE(n_components=2, random_state=0)
1272 data = numpy.array([data[column]
for column
in columns]).T
1275 data = numpy.array([data[column][mask]
for column
in columns]).T
1276 data = model.transform(data)
1277 self.axis.scatter(data[:, 0], data[:, 1])
1279 print(
"Cannot create TSNE plot. Install sklearn if you want it")
1284 Sets limits, title, axis-labels and legend of the plot
1289 class Importance(Plotter):
1291 Plots importance matrix
1294 def add(self, data, columns, variables, displayHeatMap):
1296 Add a new correlation plot.
1297 @param data pandas.DataFrame containing all data
1298 @param columns which are used to calculate the correlations
1300 self.figure.set_tight_layout(
True)
1303 width = (numpy.max(x) - numpy.min(x))
1305 return numpy.zeros(x.shape)
1306 return (x - numpy.min(x)) / width * 100
1308 importance_matrix = numpy.vstack([norm(data[column])
for column
in columns]).T
1310 cRdBu = plt.get_cmap(
'RdBu')
1311 new_RdBu = truncate_colormap(cRdBu, 0.5, 0.85)
1314 labels = list(variables)
1316 for y
in range(importance_matrix.shape[0]):
1317 for x
in range(importance_matrix.shape[1]):
1318 labelsValues.append([importance_matrix[y, x], labels[y]])
1320 labelsValues = np.array(sorted(labelsValues))
1322 arrayToSort = np.array(np.sort(importance_matrix, axis=0))
1324 importance_heatmap = self.axis.pcolor(arrayToSort, cmap=new_RdBu, vmin=0, vmax=100)
1330 self.axis.set_yticks(numpy.arange(importance_matrix.shape[0]) + 0.5, minor=
False)
1331 self.axis.set_xticks(numpy.arange(importance_matrix.shape[1]) + 0.5, minor=
False)
1333 self.axis.set_xticklabels(columns, minor=
False, rotation=90)
1336 if labelsValues.shape[0] < 6:
1338 self.axis.set_yticklabels(labelsValues[:, 1], minor=
False, size=58)
1341 self.axis.set_yticklabels(labelsValues[:, 1], minor=
False)
1343 self.axis.set_xticklabels([
''])
1352 for y
in range(labelsValues.shape[0]):
1353 self.axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % float(labelsValues[y][0]),
1355 horizontalalignment=
'center',
1356 verticalalignment=
'center')
1359 cb = self.figure.colorbar(importance_heatmap, ticks=[2, 98], orientation=
'vertical')
1360 cb.ax.tick_params(length=0)
1361 cb.ax.set_yticklabels([
r'${\rm low}$',
r'${\rm high}$'], size=60)
1363 self.axis.set_aspect(
'equal')
1369 Sets limits, title, axis-labels and legend of the plot
1374 def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
1375 new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
1376 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
1377 cmap(np.linspace(minval, maxval, n)))
1381 class CorrelationMatrix(Plotter):
1383 Plots correlation matrix
1392 def __init__(self, figure=None):
1394 Creates a new figure if None is given, sets the default plot parameters
1395 @param figure default draw figure which is used
1398 self.figure = matplotlib.figure.Figure(figsize=(38, 24))
1399 self.figure.set_tight_layout(
True)
1401 self.figure = figure
1403 gs = matplotlib.gridspec.GridSpec(16, 2)
1404 self.signal_axis = self.figure.add_subplot(gs[:14, 0])
1405 self.bckgrd_axis = self.figure.add_subplot(gs[:14, 1], sharey=self.signal_axis)
1407 self.colorbar_axis = self.figure.add_subplot(gs[15, :])
1409 self.axis = self.signal_axis
1411 super(CorrelationMatrix, self).__init__(self.figure, self.axis)
1413 def add(self, data, columns, signal_mask, bckgrd_mask, bkgrOutput):
1415 Add a new correlation plot.
1416 @param data pandas.DataFrame containing all data
1417 @param columns which are used to calculate the correlations
1420 signal_corr = numpy.corrcoef(numpy.vstack([data[column][signal_mask]
for column
in columns])) * 100
1421 bckgrd_corr = numpy.corrcoef(numpy.vstack([data[column][bckgrd_mask]
for column
in columns])) * 100
1423 mirrored_signal_corr = np.zeros(signal_corr.shape)
1424 mirrored_bckgrd_corr = np.zeros(bckgrd_corr.shape)
1426 for y
in range(signal_corr.shape[0]):
1427 for x
in range(signal_corr.shape[1]):
1428 mirrored_signal_corr[y, x] = signal_corr[y, signal_corr.shape[1] - 1 - x]
1430 for y
in range(bckgrd_corr.shape[0]):
1431 for x
in range(bckgrd_corr.shape[1]):
1432 mirrored_bckgrd_corr[y, x] = bckgrd_corr[y, bckgrd_corr.shape[1] - 1 - x]
1434 cRdBu = plt.get_cmap(
'RdBu')
1435 new_RdBu = truncate_colormap(cRdBu, 0.15, 0.85)
1436 signal_heatmap = self.signal_axis.pcolor(mirrored_signal_corr, cmap=new_RdBu, vmin=-100.0, vmax=100.0)
1444 for y
in range(mirrored_signal_corr.shape[0]):
1445 for x
in range(mirrored_signal_corr.shape[1]):
1446 outputWithRedundantMinus =
'%.0f' % mirrored_signal_corr[y, x]
1447 if outputWithRedundantMinus ==
'-0':
1448 mirrored_signal_corr[y, x] = 0
1450 for y
in range(mirrored_bckgrd_corr.shape[0]):
1451 for x
in range(mirrored_bckgrd_corr.shape[1]):
1452 outputWithRedundantMinus =
'%.0f' % mirrored_bckgrd_corr[y, x]
1453 if outputWithRedundantMinus ==
'-0':
1454 mirrored_bckgrd_corr[y, x] = 0
1456 self.signal_axis.invert_yaxis()
1457 self.signal_axis.xaxis.tick_top()
1458 self.bckgrd_axis.invert_yaxis()
1459 self.bckgrd_axis.xaxis.tick_top()
1462 self.signal_axis.set_xticks(numpy.arange(mirrored_signal_corr.shape[0]) + 0.5, minor=
False)
1463 self.signal_axis.set_yticks(numpy.arange(mirrored_signal_corr.shape[1]) + 0.5, minor=
False)
1468 self.bckgrd_axis.set_xticks(numpy.arange(mirrored_bckgrd_corr.shape[0]) + 0.5, minor=
False)
1469 self.bckgrd_axis.set_yticks(numpy.arange(mirrored_bckgrd_corr.shape[1]) + 0.5, minor=
False)
1471 if mirrored_signal_corr.shape[0] < 8:
1473 self.bckgrd_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90, size=58)
1474 self.bckgrd_axis.set_yticklabels(columns, minor=
False, size=58)
1475 self.signal_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90, size=58)
1476 self.signal_axis.set_yticklabels(columns, minor=
False, size=58)
1478 self.bckgrd_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90)
1479 self.bckgrd_axis.set_yticklabels(columns, minor=
False)
1480 self.signal_axis.set_xticklabels(list(reversed(columns)), minor=
False, rotation=90)
1481 self.signal_axis.set_yticklabels(columns, minor=
False)
1483 for y
in range(mirrored_signal_corr.shape[0]):
1484 for x
in range(mirrored_signal_corr.shape[1]):
1485 if mirrored_signal_corr.shape[0] > 24
and mirrored_signal_corr[y, x] < 0:
1486 self.signal_axis.text(x + 0.5, y + 0.5,
'-' +
r'$%.0f$' % abs(mirrored_signal_corr[y, x]),
1488 horizontalalignment=
'center',
1489 verticalalignment=
'center')
1491 self.signal_axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % mirrored_signal_corr[y, x],
1493 horizontalalignment=
'center',
1494 verticalalignment=
'center')
1496 for y
in range(mirrored_bckgrd_corr.shape[0]):
1497 for x
in range(mirrored_bckgrd_corr.shape[1]):
1498 if mirrored_bckgrd_corr.shape[0] > 24
and mirrored_bckgrd_corr[y, x] < 0:
1499 self.signal_axis.text(x + 0.5, y + 0.5,
'-' +
r'$%.0f$' % abs(mirrored_bckgrd_corr[y, x]),
1501 horizontalalignment=
'center',
1502 verticalalignment=
'center')
1504 self.bckgrd_axis.text(x + 0.5, y + 0.5,
r'$%.0f$' % mirrored_bckgrd_corr[y, x],
1506 horizontalalignment=
'center',
1507 verticalalignment=
'center')
1509 cb = self.figure.colorbar(signal_heatmap, cax=self.colorbar_axis, ticks=[-92.3, 0, 92.5], orientation=
'horizontal')
1510 cb.ax.tick_params(length=0)
1511 cb.ax.set_xticklabels([
r'${\rm negative}$',
r'${\rm uncorrelated}$',
r'${\rm positive}$'], fontsize=60)
1513 if bkgrOutput == -1:
1514 self.figure.text(0.30, 0.11,
r'$B^0\,(q_{\rm MC} = +1)$', horizontalalignment=
'center', size=65)
1515 self.figure.text(0.74, 0.11,
r'$\bar{B}^0\,(q_{\rm MC} = -1)$', horizontalalignment=
'center', size=65)
1518 self.figure.text(0.27, 0.115,
r'${\rm Signal}$', horizontalalignment=
'center', size=65)
1519 self.figure.text(0.73, 0.115,
r'${\rm Background}$', horizontalalignment=
'center', size=65)
1525 Sets limits, title, axis-labels and legend of the plot
1527 matplotlib.artist.setp(self.bckgrd_axis.get_yticklabels(), visible=
False)
1531 if __name__ ==
'__main__':
1533 def get_data(N, columns):
1535 Creates fake data for example plots
1538 n = len(columns) - 1
1539 xs = numpy.random.normal(0, size=(N, n))
1540 xb = numpy.random.normal(1, size=(N, n))
1543 data = pandas.DataFrame(numpy.c_[numpy.r_[xs, xb], numpy.r_[ys, yb]], columns=columns)
1544 return data.reindex(numpy.random.permutation(data.index))
1548 seaborn.set(font_scale=3)
1549 seaborn.set_style(
'whitegrid')
1553 data = get_data(N, columns=[
'FastBDT',
'NeuroBayes',
'isSignal'])
1555 data.type.iloc[:N / 2] =
'Train'
1556 data.type.iloc[N / 2:] =
'Test'
1559 p.add(data,
'FastBDT')
1561 p.save(
'box_plot.png')
1563 p = VerboseDistribution()
1564 p.add(data,
'FastBDT')
1565 p.add(data,
'NeuroBayes')
1567 p.save(
'verbose_distribution_plot.png')
1569 p = PurityOverEfficiency()
1570 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1571 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1573 p.save(
'roc_purity_plot.png')
1575 p = RejectionOverEfficiency()
1576 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1577 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1579 p.save(
'roc_rejection_plot.png')
1582 p.add(data,
'FastBDT', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1583 p.add(data,
'NeuroBayes', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1585 p.save(
'diagonal_plot.png')
1588 p.add(data,
'FastBDT')
1589 p.add(data,
'NeuroBayes')
1591 p.save(
'distribution_plot.png')
1594 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1595 p.add(data,
'NeuroBayes', data[
'type'] ==
'Train', data[
'type'] ==
'Test')
1597 p.save(
'difference_plot.png')
1600 p.add(data,
'FastBDT', data[
'type'] ==
'Train', data[
'type'] ==
'Test', data[
'isSignal'] == 1, data[
'isSignal'] == 0)
1602 p.save(
'overtraining_plot.png')
1605 p.add(data,
'FastBDT',
'NeuroBayes', [0, 20, 40, 60, 80, 100], data[
'isSignal'] == 0)
1607 p.save(
'correlation_plot.png')
1609 p = CorrelationMatrix()
1610 data[
'FastBDT2'] = data[
'FastBDT']**2
1611 data[
'NeuroBayes2'] = data[
'NeuroBayes']**2
1612 data[
'FastBDT3'] = data[
'FastBDT']**3
1613 data[
'NeuroBayes3'] = data[
'NeuroBayes']**3
1614 p.add(data, [
'FastBDT',
'NeuroBayes',
'FastBDT2',
'NeuroBayes2',
'FastBDT3',
'NeuroBayes3'])
1616 p.save(
'correlation_matrix.png')
def calculate_flatness(f, p, w=None)
def weighted_mean_and_std(x, w)