Belle II Software development
plotting.py
1#!/usr/bin/env python3
2
3
4
11
12import copy
13import math
14
15import pandas
16import numpy
17import itertools
18import matplotlib.pyplot as plt
19import matplotlib.artist
20import matplotlib.figure
21import matplotlib.gridspec
22import matplotlib.colors
23import matplotlib.patches
24import matplotlib.ticker
25import matplotlib.patheffects as PathEffects
26
27
28from basf2_mva_evaluation import histogram
29
30import basf2 as b2
31
32import basf2_mva_util
33import matplotlib
34
35# Do not use standard backend TkAgg, because it is NOT thread-safe
36# You will get an RuntimeError: main thread is not in main loop otherwise!
37matplotlib.use("svg")
38
39# Use the Belle II style while producing the plots
40plt.style.use("belle2")
41
42
43class Plotter:
44 """
45 Base class for all Plotters.
46 """
47
48 # stupid workaround for doxygen refusing to document things
49
50
52
53
57
58
59 plots = None
60
61 labels = None
62
63 xmin = None
64
65 xmax = None
66
67 ymin = None
68
69 ymax = None
70 yscale = 0.0
71 xscale = 0.0
72
73 figure = None
74
75 axis = None
76
77 def __init__(self, figure=None, axis=None, dpi=None):
78 """
79 Creates a new figure and axis if None is given, sets the default plot parameters
80 @param figure default draw figure which is used
81 @param axis default draw axis which is used
82 @param dpi dpi for the matplotlib figure, if None default is used
83 """
84 b2.B2INFO("Create new figure for class " + str(type(self)))
85
86 self.dpi = dpi
87 if figure is None:
88
89 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=dpi)
90 else:
91 self.figure = figure
92
93 if axis is None:
94
95 self.axis = self.figure.add_subplot(1, 1, 1)
96 else:
97 self.axis = axis
98
99
100 self.plots = []
101
102 self.labels = []
103
104 self.xmin, self.xmax = float(0), float(1)
105
106 self.ymin, self.ymax = float(0), float(1)
107
108 self.yscale = 0.1
109
110 self.xscale = 0.0
111
112
113 self.plot_kwargs = None
114
115 self.errorbar_kwargs = None
116
118
119 self.fill_kwargs = None
120
121 self.set_plot_options()
124 self.set_fill_options()
125
126
127 self.prop_cycler = itertools.cycle(plt.rcParams["axes.prop_cycle"])
128
129 def add_subplot(self, gridspecs):
130 """
131 Adds a new subplot to the figure, updates all other axes
132 according to the given gridspec
133 @param gridspecs gridspecs for all axes including the new one
134 """
135 for gs, ax in zip(gridspecs[:-1], self.figure.axes):
136 ax.set_position(gs.get_position(self.figure))
137 ax.set_subplotspec(gs)
138 axis = self.figure.add_subplot(gridspecs[-1], sharex=self.axis)
139 return axis
140
141 def save(self, filename):
142 """
143 Save the figure into a file
144 @param filename of the file
145 """
146 b2.B2INFO("Save figure for class " + str(type(self)))
147 from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
148 canvas = FigureCanvas(self.figure)
149 canvas.print_figure(filename, dpi=self.dpi, bbox_inches='tight')
150 return self
151
152 def set_plot_options(self, plot_kwargs={'linestyle': ''}):
153 """
154 Overrides default plot options for datapoint plot
155 @param plot_kwargs keyword arguments for the plot function
156 """
157 self.plot_kwargs = copy.copy(plot_kwargs)
158 return self
159
160 def set_errorbar_options(self, errorbar_kwargs={'fmt': '.', 'elinewidth': 3, 'alpha': 1}):
161 """
162 Overrides default errorbar options for datapoint errorbars
163 @param errorbar_kwargs keyword arguments for the errorbar function
164 """
165 self.errorbar_kwargs = copy.copy(errorbar_kwargs)
166 return self
167
168 def set_errorband_options(self, errorband_kwargs={'alpha': 0.5}):
169 """
170 Overrides default errorband options for datapoint errorband
171 @param errorbar_kwargs keyword arguments for the fill_between function
172 """
173 self.errorband_kwargs = copy.copy(errorband_kwargs)
174 return self
175
176 def set_fill_options(self, fill_kwargs=None):
177 """
178 Overrides default fill_between options for datapoint errorband
179 @param fill_kwargs keyword arguments for the fill_between function
180 """
181 self.fill_kwargs = copy.copy(fill_kwargs)
182 return self
183
184 def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None):
185 """
186 Plot the given datapoints, with plot, errorbar and make a errorband with fill_between
187 @param x coordinates of the data points
188 @param y coordinates of the data points
189 @param xerr symmetric error on x data points
190 @param yerr symmetric error on y data points
191 """
192 p = e = f = None
193 plot_kwargs = copy.copy(self.plot_kwargs)
194 errorbar_kwargs = copy.copy(self.errorbar_kwargs)
195 errorband_kwargs = copy.copy(self.errorband_kwargs)
196 fill_kwargs = copy.copy(self.fill_kwargs)
197
198 if plot_kwargs is None or 'color' not in plot_kwargs:
199 color = next(self.prop_cycler)
200 color = color['color']
201 plot_kwargs['color'] = color
202 else:
203 color = plot_kwargs['color']
204 color = matplotlib.colors.ColorConverter().to_rgb(color)
205 patch = matplotlib.patches.Patch(color=color, alpha=0.5)
206 patch.get_color = patch.get_facecolor
207 patches = [patch]
208
209 if plot_kwargs is not None:
210 p, = axis.plot(x, y, rasterized=True, **plot_kwargs)
211 patches.append(p)
212
213 if errorbar_kwargs is not None and (xerr is not None or yerr is not None):
214 if 'color' not in errorbar_kwargs:
215 errorbar_kwargs['color'] = color
216 if 'ecolor' not in errorbar_kwargs:
217 errorbar_kwargs['ecolor'] = [0.5 * x for x in color]
218
219 # fully mask nan values.
220 # Needed until https://github.com/matplotlib/matplotlib/pull/23333 makes it into the externals.
221 # TODO: remove in release 8.
222 if not isinstance(xerr, (numpy.ndarray, list)):
223 xerr = xerr*numpy.ones(len(x))
224 if not isinstance(yerr, (numpy.ndarray, list)):
225 yerr = yerr*numpy.ones(len(y))
226 mask = numpy.logical_and.reduce([numpy.isfinite(v) for v in [x, y, xerr, yerr]])
227
228 e = axis.errorbar(
229 x[mask], y[mask], xerr=numpy.where(
230 xerr[mask] < 0, 0.0, xerr[mask]), yerr=numpy.where(
231 yerr[mask] < 0, 0.0, yerr[mask]), rasterized=True, **errorbar_kwargs)
232 patches.append(e)
233
234 if errorband_kwargs is not None and yerr is not None:
235 if 'color' not in errorband_kwargs:
236 errorband_kwargs['color'] = color
237 if xerr is not None:
238 # Ensure that xerr and yerr are iterable numpy arrays
239 xerr = x + xerr - x
240 yerr = y + yerr - y
241 for _x, _y, _xe, _ye in zip(x, y, xerr, yerr):
242 axis.add_patch(matplotlib.patches.Rectangle((_x - _xe, _y - _ye), 2 * _xe, 2 * _ye, rasterized=True,
243 **errorband_kwargs))
244 else:
245 f = axis.fill_between(x, y - yerr, y + yerr, interpolate=True, rasterized=True, **errorband_kwargs)
246
247 if fill_kwargs is not None:
248 # to fill the last bin of a histogram
249 x = numpy.append(x, x[-1]+2*xerr[-1])
250 y = numpy.append(y, y[-1])
251 xerr = numpy.append(xerr, xerr[-1])
252
253 axis.fill_between(x-xerr, y, 0, rasterized=True, **fill_kwargs)
254
255 return (tuple(patches), p, e, f)
256
257 def add(self, *args, **kwargs):
258 """
259 Add a new plot to this plotter
260 """
261 return NotImplemented
262
263 def setAxisLimits(self, factor=0.0):
264 """
265 Sets the limits of the axis with an optional expansion factor.
266
267 Parameters:
268 factor (float): Fraction by which to expand the axis limits beyond the data range.
269 """
270 dx = self.xmax - self.xmin
271 dy = self.ymax - self.ymin
272 self.axis.set_xlim((self.xmin - factor*dx, self.xmax + factor*dx))
273 self.axis.set_ylim((self.ymin - factor*dy, self.ymax + factor*dy))
274
275 def finish(self, *args, **kwargs):
276 """
277 Finish plotting and set labels, legends and stuff
278 """
279 return NotImplemented
280
281 def scale_limits(self):
282 """
283 Scale limits to increase distance to boundaries
284 """
285 self.ymin *= 1.0 - math.copysign(self.yscale, self.ymin)
286 self.ymax *= 1.0 + math.copysign(self.yscale, self.ymax)
287 self.xmin *= 1.0 - math.copysign(self.xscale, self.xmin)
288 self.xmax *= 1.0 + math.copysign(self.xscale, self.xmax)
289 return self
290
291
293 """
294 Plots the purity and the efficiency over the cut value (for cut choosing)
295 """
296
300
301 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
302 """
303 Add a new curve to the plot
304 @param data pandas.DataFrame containing all data
305 @param column which is used to calculate efficiency and purity for different cuts
306 @param signal_mask boolean numpy.array defining which events are signal events
307 @param bckgrd_mask boolean numpy.array defining which events are background events
308 @param weight_column column in data containing the weights for each event
309 @param normed boolean if True, the efficiency and purity are normalized to 1
310 """
311
312 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
313
314 if normed:
315 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
316 purity, purity_error = hists.get_purity(['Signal'], ['Background'])
317 else:
318 efficiency, efficiency_error = hists.get_true_positives(['Signal'])
319 purity, purity_error = hists.get_false_positives(['Background'])
320
321 if isinstance(efficiency, int) and not isinstance(purity, int):
322 efficiency = numpy.array([efficiency] * len(purity))
323 elif isinstance(purity, int) and not isinstance(efficiency, int):
324 purity = numpy.array([purity] * len(efficiency))
325 elif isinstance(purity, int) and isinstance(efficiency, int):
326 efficiency = numpy.array([efficiency])
327 purity = numpy.array([purity])
328 cuts = hists.bin_centers
329
330 self.xmin, self.xmax = numpy.nanmin(numpy.append(cuts, self.xmin)), numpy.nanmax(numpy.append(cuts, self.xmax))
331 self.ymin, self.ymax = numpy.nanmin(
332 numpy.concatenate(
333 (efficiency, purity, [
334 self.ymin]))), numpy.nanmax(
335 numpy.concatenate(
336 (efficiency, purity, [
337 self.ymax])))
338
339 self.set_errorbar_options({'fmt': '-o'})
340 self.plots.append(self._plot_datapoints(self.axis, cuts, efficiency, xerr=0, yerr=efficiency_error))
341
342 if normed:
343 self.labels.append("Efficiency")
344 else:
345 self.labels.append("True positive")
346
347 self.set_errorbar_options({'fmt': '-o'})
348 self.plots.append(self._plot_datapoints(self.axis, cuts, purity, xerr=0, yerr=purity_error))
349
350 if normed:
351 self.labels.append("Purity")
352 else:
353 self.labels.append("False positive")
354
355 self.axis.set_title("Classification Plot")
356
357 return self
358
359 def finish(self):
360 """
361 Sets limits, title, axis-labels and legend of the plot
362 """
363 self.setAxisLimits(factor=0.01)
364 self.axis.get_xaxis().set_label_text('Cut Value')
365 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
366 return self
367
368
370 """
371 Plots the signal to noise ratio over the cut value (for cut choosing)
372 """
373
377
378 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
379 """
380 Add a new curve to the plot
381 @param data pandas.DataFrame containing all data
382 @param column which is used to calculate signal to noise ratio for different cuts
383 @param signal_mask boolean numpy.array defining which events are signal events
384 @param bckgrd_mask boolean numpy.array defining which events are background events
385 @param weight_column column in data containing the weights for each event
386 @param label label for the plot legend
387 """
388 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
389 signal2noise, signal2noise_error = hists.get_signal_to_noise(['Signal'], ['Background'])
390 cuts = hists.bin_centers
391
392 valid = numpy.isfinite(signal2noise)
393 signal2noise = signal2noise[valid]
394 signal2noise_error = signal2noise_error[valid]
395 cuts = cuts[valid]
396
397 # Determine "best" cut by maximizing Signal to Noise
398 if len(signal2noise) == 0 or numpy.all(numpy.isnan(signal2noise)):
399 best_idx = None
400 else:
401 best_idx = numpy.nanargmax(signal2noise)
402 best_cut = cuts[best_idx]
403 best_signal2noise = signal2noise[best_idx]
404
405 self.xmin, self.xmax = numpy.nanmin(numpy.append(cuts, self.xmin)), numpy.nanmax(numpy.append(cuts, self.xmax))
406 self.ymin, self.ymax = numpy.nanmin(
407 numpy.append(
408 signal2noise, self.ymin)), numpy.nanmax(
409 numpy.append(
410 signal2noise, self.ymax))
411
412 self.set_errorbar_options({'fmt': '-o'})
413 p = self._plot_datapoints(self.axis, cuts, signal2noise, xerr=0, yerr=signal2noise_error)
414 self.plots.append(p)
415
416 # Plot best cut point
417 if best_idx is not None:
418 self.axis.plot(best_cut, best_signal2noise, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
419 self.axis.axvline(best_cut, color=p[1].get_color(), linestyle='dashed', linewidth=1)
420 self.axis.axhline(best_signal2noise, color=p[1].get_color(), linestyle='dashed', linewidth=1)
421
422 # Add label with best cut info
423 cut_label = f"{label[:10] if label else column[:10]} (Best cut: {best_cut:.3f}, S/N: {best_signal2noise:.2f})"
424 self.labels.append(cut_label)
425 return self
426
427 def finish(self):
428 """
429 Sets limits, title, axis-labels and legend of the plot
430 """
431 self.setAxisLimits(factor=0.05)
432 self.axis.set_title("Signal to Noise Plot")
433 self.axis.get_xaxis().set_label_text('Cut Value')
434 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
435 return self
436
437
439 """
440 Plots the purity over the efficiency also known as ROC curve
441 """
442
446
447 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
448 """
449 Add a new curve to the ROC plot
450 @param data pandas.DataFrame containing all data
451 @param column which is used to calculate efficiency and purity for different cuts
452 @param signal_mask boolean numpy.array defining which events are signal events
453 @param bckgrd_mask boolean numpy.array defining which events are background events
454 @param weight_column column in data containing the weights for each event
455 @param label label for the plot legend
456 """
457 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
458 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
459 purity, purity_error = hists.get_purity(['Signal'], ['Background'])
460 if isinstance(efficiency, int) and not isinstance(purity, int):
461 efficiency = numpy.array([efficiency] * len(purity))
462 elif isinstance(purity, int) and not isinstance(efficiency, int):
463 purity = numpy.array([purity] * len(efficiency))
464 elif isinstance(purity, int) and isinstance(efficiency, int):
465 efficiency = numpy.array([efficiency])
466 purity = numpy.array([purity])
467 cuts = hists.bin_centers
468
469 valid = numpy.isfinite(purity) & numpy.isfinite(efficiency)
470 efficiency = efficiency[valid]
471 purity = purity[valid]
472 cuts = cuts[valid]
473 if not isinstance(efficiency_error, int):
474 efficiency_error = efficiency_error[valid]
475 if not isinstance(purity_error, int):
476 purity_error = purity_error[valid]
477
478 # Determine "best" cut (closest to point (1,1))
479 distance = numpy.sqrt(numpy.square(1 - purity) + numpy.square(1 - efficiency))
480 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
481 best_idx = None
482 else:
483 best_idx = numpy.nanargmin(distance)
484 best_cut = cuts[best_idx]
485 best_efficiency = efficiency[best_idx]
486 best_purity = purity[best_idx]
487
488 self.xmin, self.xmax = numpy.nanmin(numpy.append(efficiency, self.xmin)), numpy.nanmax(numpy.append(efficiency, self.xmax))
489 self.ymin, self.ymax = numpy.nanmin(numpy.append(purity, self.ymin)), numpy.nanmax(numpy.append(purity, self.ymax))
490
491 self.set_errorbar_options({'fmt': '-o'})
492 p = self._plot_datapoints(self.axis, efficiency, purity, xerr=efficiency_error, yerr=purity_error)
493 self.plots.append(p)
494
495 if best_idx is not None:
496 # Plot best cut point
497 self.axis.plot(best_efficiency, best_purity, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
498 self.axis.axhline(best_purity, color=p[1].get_color(), linestyle='dashed', linewidth=1)
499 self.axis.axvline(best_efficiency, color=p[1].get_color(), linestyle='dashed', linewidth=1)
500
501 # Add label with best cut info
502 cut_label = f"{label[:10] if label else column[:10]} (Best cut: {best_cut:.3f})"
503 self.labels.append(cut_label)
504 return self
505
506 def finish(self):
507 """
508 Sets limits, title, axis-labels and legend of the plot
509 """
510 self.setAxisLimits(factor=0.01)
511 self.axis.set_title("ROC Purity Plot")
512 self.axis.get_xaxis().set_label_text('Efficiency')
513 self.axis.get_yaxis().set_label_text('Purity')
514 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
515 return self
516
517
519 """
520 Plots the rejection over the efficiency also known as ROC curve
521 """
522
526
527 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
528 """
529 Add a new curve to the ROC plot
530 @param data pandas.DataFrame containing all data
531 @param column which is used to calculate efficiency and purity for different cuts
532 @param signal_mask boolean numpy.array defining which events are signal events
533 @param bckgrd_mask boolean numpy.array defining which events are background events
534 @param weight_column column in data containing the weights for each event
535 @param label label for the plot legend
536 """
537 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
538 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
539 rejection, rejection_error = hists.get_efficiency(['Background'])
540 rejection = 1 - rejection
541 if isinstance(efficiency, int) and not isinstance(rejection, int):
542 efficiency = numpy.array([efficiency] * len(rejection))
543 elif isinstance(rejection, int) and not isinstance(efficiency, int):
544 rejection = numpy.array([rejection] * len(efficiency))
545 elif isinstance(rejection, int) and isinstance(efficiency, int):
546 efficiency = numpy.array([efficiency])
547 rejection = numpy.array([rejection])
548 cuts = hists.bin_centers
549
550 valid = numpy.isfinite(rejection) & numpy.isfinite(efficiency)
551 efficiency = efficiency[valid]
552 rejection = rejection[valid]
553 cuts = cuts[valid]
554 if not isinstance(efficiency_error, int):
555 efficiency_error = efficiency_error[valid]
556 if not isinstance(rejection_error, int):
557 rejection_error = rejection_error[valid]
558
559 # Determine "best" cut by maximizing Rejection / Efficiency
560 distance = numpy.sqrt(numpy.square(1 - rejection) + numpy.square(1 - efficiency))
561 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
562 best_idx = None
563 else:
564 best_idx = numpy.nanargmin(distance)
565 best_cut = cuts[best_idx]
566 best_rejection = rejection[best_idx]
567 best_efficiency = efficiency[best_idx]
568
569 self.xmin, self.xmax = numpy.nanmin(numpy.append(efficiency, self.xmin)), numpy.nanmax(numpy.append(efficiency, self.xmax))
570 self.ymin, self.ymax = numpy.nanmin(numpy.append(rejection, self.ymin)), numpy.nanmax(numpy.append(rejection, self.ymax))
571
572 auc = numpy.abs(numpy.trapz(rejection, efficiency))
573
574 self.set_errorbar_options({'fmt': '-o'})
575 p = self._plot_datapoints(self.axis, efficiency, rejection, xerr=efficiency_error, yerr=rejection_error)
576 self.plots.append(p)
577
578 if best_idx is not None:
579 # Plot best cut point
580 self.axis.plot(best_efficiency, best_rejection, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
581 self.axis.axhline(best_rejection, color=p[1].get_color(), linestyle='dashed', linewidth=1)
582 self.axis.axvline(best_efficiency, color=p[1].get_color(), linestyle='dashed', linewidth=1)
583
584 # Add label with best cut info
585 cut_label = f"{label[:10] if label else column[:10]} (AUC: {auc:.2f}, Best cut: {best_cut:.3f})"
586 self.labels.append(cut_label)
587 return self
588
589 def finish(self):
590 """
591 Sets limits, title, axis-labels and legend of the plot
592 """
593 self.setAxisLimits(factor=0.01)
594 self.axis.set_title("ROC Rejection Plot")
595 self.axis.get_yaxis().set_label_text('Background Rejection')
596 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
597
598 self.axis.get_xaxis().set_label_text('Signal Efficiency')
599 return self
600
601
603 """
604 Plots the true ROC curve: True Positive Rate (TPR) vs False Positive Rate (FPR),
605 and marks the cut that gives the point closest to the ideal (0,1).
606 """
607
611
612 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
613 """
614 Add a new curve to the ROC plot
615 @param data pandas.DataFrame containing all data
616 @param column which is used to calculate efficiency and purity for different cuts
617 @param signal_mask boolean numpy.array defining which events are signal events
618 @param bckgrd_mask boolean numpy.array defining which events are background events
619 @param weight_column column in data containing the weights for each event
620 @param label label for the plot legend
621 """
622 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask},
623 weight_column=weight_column)
624
625 tpr, tpr_error = hists.get_efficiency(['Signal']) # True Positive Rate (TPR)
626 fpr, fpr_error = hists.get_efficiency(['Background']) # False Positive Rate (FPR)
627 if isinstance(tpr, int) and not isinstance(fpr, int):
628 tpr = numpy.array([tpr] * len(fpr))
629 elif isinstance(fpr, int) and not isinstance(tpr, int):
630 fpr = numpy.array([fpr] * len(tpr))
631 elif isinstance(fpr, int) and isinstance(tpr, int):
632 tpr = numpy.array([tpr])
633 fpr = numpy.array([fpr])
634 cuts = hists.bin_centers # Cut values for each bin
635
636 valid = numpy.isfinite(tpr) & numpy.isfinite(fpr)
637 tpr = tpr[valid]
638 fpr = fpr[valid]
639 cuts = cuts[valid]
640 if not isinstance(tpr_error, int):
641 tpr_error = tpr_error[valid]
642 if not isinstance(fpr_error, int):
643 fpr_error = fpr_error[valid]
644
645 # Determine "best" cut (closest to top-left corner (0,1))
646 distance = numpy.sqrt(numpy.square(fpr) + numpy.square(1 - tpr))
647 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
648 best_idx = None
649 else:
650 best_idx = numpy.nanargmin(distance)
651 best_cut = cuts[best_idx]
652 best_tpr = tpr[best_idx]
653 best_fpr = fpr[best_idx]
654
655 # Update plot range
656 self.xmin, self.xmax = numpy.nanmin(numpy.append(fpr, self.xmin)), numpy.nanmax(numpy.append(fpr, self.xmax))
657 self.ymin, self.ymax = numpy.nanmin(numpy.append(tpr, self.ymin)), numpy.nanmax(numpy.append(tpr, self.ymax))
658
659 auc = numpy.abs(numpy.trapz(tpr, fpr))
660
661 self.set_errorbar_options({'fmt': '-o'})
662 p = self._plot_datapoints(self.axis, fpr, tpr, xerr=fpr_error, yerr=tpr_error)
663 self.plots.append(p)
664
665 if best_idx is not None:
666 # Plot best cut point
667 self.axis.plot(best_fpr, best_tpr, 'x', color=p[1].get_color(), markersize=8)
668 self.axis.axhline(best_tpr, color=p[1].get_color(), linestyle='dashed', linewidth=1)
669 self.axis.axvline(best_fpr, color=p[1].get_color(), linestyle='dashed', linewidth=1)
670
671 # Add label with best cut info
672 cut_label = f"{label[:10] if label else column[:10]} (AUC: {auc:.2f}, Cut: {best_cut:.3f})"
673 self.labels.append(cut_label)
674 return self
675
676 def finish(self):
677 """
678 Sets limits, title, axis-labels and legend of the plot
679 """
680 self.setAxisLimits(factor=0.01)
681 self.axis.set_title("True ROC Curve")
682 self.axis.get_xaxis().set_label_text('False Positive Rate (Background Efficiency)')
683 self.axis.get_yaxis().set_label_text('True Positive Rate (Signal Efficiency)')
684 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
685 return self
686
687
689 """
690 Plots the Precision vs Recall curve and marks the cut that gives the point closest to the ideal (1,1).
691 """
692
696
697 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
698 """
699 Add a new curve to the Precision-Recall plot
700 @param data pandas.DataFrame containing all data
701 @param column which is used to calculate efficiency and purity for different cuts
702 @param signal_mask boolean numpy.array defining which events are signal events
703 @param bckgrd_mask boolean numpy.array defining which events are background events
704 @param weight_column column in data containing the weights for each event
705 @param label label for the plot legend
706 """
707 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask},
708 weight_column=weight_column)
709
710 recall, recall_error = hists.get_efficiency(['Signal']) # Recall = TPR
711 precision, precision_error = hists.get_purity(['Signal'], ['Background'])
712 if isinstance(recall, int) and not isinstance(precision, int):
713 recall = numpy.array([recall] * len(precision))
714 elif isinstance(precision, int) and not isinstance(recall, int):
715 precision = numpy.array([precision] * len(recall))
716 elif isinstance(precision, int) and isinstance(recall, int):
717 recall = numpy.array([recall])
718 precision = numpy.array([precision])
719 cuts = hists.bin_centers
720
721 valid = numpy.isfinite(precision) & numpy.isfinite(recall)
722 precision = precision[valid]
723 recall = recall[valid]
724 cuts = cuts[valid]
725 if not isinstance(recall_error, int):
726 recall_error = recall_error[valid]
727 if not isinstance(precision_error, int):
728 precision_error = precision_error[valid]
729
730 # Determine "best" cut (closest to point (1,1))
731 distance = numpy.sqrt(numpy.square(1 - precision) + numpy.square(1 - recall))
732 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
733 best_idx = None
734 else:
735 best_idx = numpy.nanargmin(distance)
736 best_cut = cuts[best_idx]
737 best_recall = recall[best_idx]
738 best_precision = precision[best_idx]
739
740 # Update plot range
741 self.xmin, self.xmax = numpy.nanmin(numpy.append(recall, self.xmin)), numpy.nanmax(numpy.append(recall, self.xmax))
742 self.ymin, self.ymax = numpy.nanmin(numpy.append(precision, self.ymin)), numpy.nanmax(numpy.append(precision, self.ymax))
743
744 auc = numpy.abs(numpy.trapz(precision, recall))
745
746 self.set_errorbar_options({'fmt': '-o'})
747 p = self._plot_datapoints(self.axis, recall, precision, xerr=recall_error, yerr=precision_error)
748 self.plots.append(p)
749
750 if best_idx is not None:
751 # Plot best cut point
752 self.axis.plot(best_recall, best_precision, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
753 self.axis.axhline(best_precision, color=p[1].get_color(), linestyle='dashed', linewidth=1)
754 self.axis.axvline(best_recall, color=p[1].get_color(), linestyle='dashed', linewidth=1)
755
756 # Add label with best cut info
757 cut_label = f"{label[:10] if label else column[:10]} (AUC: {auc:.2f}, Cut: {best_cut:.3f})"
758 self.labels.append(cut_label)
759 return self
760
761 def finish(self):
762 """
763 Sets limits, title, axis-labels and legend of the plot
764 """
765 self.setAxisLimits(factor=0.01)
766 self.axis.set_title("Precision-Recall Curve")
767 self.axis.get_xaxis().set_label_text('Recall (Signal Efficiency)')
768 self.axis.get_yaxis().set_label_text('Precision (Purity)')
769 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
770 return self
771
772
774 """
775 Plots multiple other plots into a grid 3x?
776 """
777
778 figure = None
779
780 axis = None
781
782 def __init__(self, cls, number_of_plots, figure=None, dpi=None):
783 """
784 Creates a new figure if None is given, sets the default plot parameters
785 @param cls class of the plot
786 @param number_of_plots number of plots which should be displayed
787 @param figure default draw figure which is used
788 @param dpi dpi for the matplotlib figure, if None default is used
789 """
790 if number_of_plots == 1:
791 gsTuple = (1, 1)
792 elif number_of_plots == 2:
793 gsTuple = (1, 2)
794 elif number_of_plots == 3:
795 gsTuple = (1, 3)
796 elif number_of_plots == 4:
797 gsTuple = (2, 2)
798 elif number_of_plots == 6:
799 gsTuple = (2, 3)
800 else:
801 gsTuple = (int(numpy.ceil(number_of_plots / 3)), 3)
802
803
804 self.dpi = dpi
805 if figure is None:
806
807 self.figure = matplotlib.figure.Figure(figsize=(12*gsTuple[1], 8*gsTuple[0]), dpi=dpi)
808 else:
809 self.figure = figure
810
811 gs = matplotlib.gridspec.GridSpec(gsTuple[0], gsTuple[1])
812
813 grid_list = list(itertools.product(range(gs.nrows), range(gs.ncols)))
814
815 self.sub_plots = [cls(self.figure, self.figure.add_subplot(gs[grid_list[i][0], grid_list[i][1]]))
816 for i in range(number_of_plots)]
817
818 self.axis = self.sub_plots[0].axis
819 super().__init__(self.figure, self.axis)
820
821 def add(self, i, *args, **kwargs):
822 """
823 Call add function of ith subplot
824 @param i position of the subplot
825 """
826 self.sub_plots[i].add(*args, **kwargs)
827
828 def finish(self):
829 """
830 Sets limits, title, axis-labels and legend of the plot
831 """
832 for plot in self.sub_plots:
833 plot.finish()
834 return self
835
836
838 """
839 Plots the purity in each bin over the classifier output.
840 """
841
845
846 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
847 """
848 Add a new curve to the Diagonal plot
849 @param data pandas.DataFrame containing all data
850 @param column which is used to calculate purity for different cuts
851 @param signal_mask boolean numpy.array defining which events are signal events
852 @param bckgrd_mask boolean numpy.array defining which events are background events
853 @param weight_column column in data containing the weights for each event
854 @param label label for the plot legend
855 """
856 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
857 purity, purity_error = hists.get_purity_per_bin(['Signal'], ['Background'])
858
859 self.xmin, self.xmax = numpy.nanmin(
860 numpy.append(
861 hists.bin_centers, self.xmin)), numpy.nanmax(
862 numpy.append(
863 hists.bin_centers, self.xmax))
864 self.ymin, self.ymax = numpy.nanmin(numpy.append(purity, self.ymin)), numpy.nanmax(numpy.append(purity, self.ymax))
865
866 self.set_errorbar_options({'fmt': '-o'})
867 p = self._plot_datapoints(self.axis, hists.bin_centers, purity, xerr=hists.bin_widths / 2.0, yerr=purity_error)
868 self.plots.append(p)
869 if label is None:
870 self.labels.append(column)
871 else:
872 self.labels.append(label)
873 return self
874
875 def finish(self):
876 """
877 Sets limits, title, axis-labels and legend of the plot
878 """
879 self.scale_limits()
880 self.axis.plot((0.0, 1.0), (0.0, 1.0), color='black')
881 self.setAxisLimits(factor=0.01)
882 self.axis.set_title("Diagonal Plot")
883 self.axis.get_xaxis().set_label_text('Classifier Output')
884 self.axis.get_yaxis().set_label_text('Purity Per Bin')
885 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
886 return self
887
888
890 """
891 Plots distribution of a quantity
892 """
893
894 def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False,
895 keep_first_binning=False, range_in_std=None):
896 """
897 Creates a new figure and axis if None is given, sets the default plot parameters
898 @param figure default draw figure which is used
899 @param axis default draw axis which is used
900 @param normed true if histograms should be normed before drawing
901 @param keep_first_binning use the binning of the first distribution for further plots
902 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
903 """
904 super().__init__(figure, axis)
905
906 self.normed_to_all_entries = normed_to_all_entries
907
908 self.normed_to_bin_width = normed_to_bin_width
909
910 self.range_in_std = range_in_std
911 # if self.normed_to_all_entries or self.normed_to_bin_width:
912
913 self.ymin = float(0)
914
915 self.ymax = float('-inf')
916
917 self.xmin = float('inf')
918
919 self.xmax = float('-inf')
920
921 self.keep_first_binning = keep_first_binning
922
923 self.first_binning = None
924
925 self.x_axis_label = ''
926
927 def add(self, data, column, mask=None, weight_column=None, label=None):
928 """
929 Add a new distribution to the plots
930 @param data pandas.DataFrame containing all data
931 @param column which is used to calculate distribution histogram
932 @param mask boolean numpy.array defining which events are used for the histogram
933 @param weight_column column in data containing the weights for each event
934 @param label label for the plot legend
935 """
936 if mask is None:
937 mask = numpy.ones(len(data)).astype('bool')
938
939 bins = 100
940 if self.keep_first_binning and self.first_binning is not None:
941 bins = self.first_binning
942 hists = histogram.Histograms(data, column, {'Total': mask}, weight_column=weight_column,
943 bins=bins, equal_frequency=False, range_in_std=self.range_in_std)
944 if self.keep_first_binning and self.first_binning is None:
945 self.first_binning = hists.bins
946 hist, hist_error = hists.get_hist('Total')
947
948 if self.normed_to_all_entries:
949 normalization = float(numpy.sum(hist))
950 hist = hist / normalization if normalization > 0 else hist
951 hist_error = hist_error / normalization if normalization > 0 else hist_error
952
953 if self.normed_to_bin_width:
954 hist = hist / hists.bin_widths if normalization > 0 else hist
955 hist_error = hist_error / hists.bin_widths if normalization > 0 else hist_error
956
957 self.xmin, self.xmax = numpy.nanmin(
958 numpy.append(
959 hists.bin_centers, self.xmin)), numpy.nanmax(
960 numpy.append(
961 hists.bin_centers, self.xmax))
962 self.ymin, self.ymax = numpy.nanmin(numpy.append(hist, self.ymin)), numpy.nanmax(numpy.append(hist + hist_error, self.ymax))
963
964 self.set_errorbar_options({'fmt': '-o'})
965 p = self._plot_datapoints(self.axis, hists.bin_centers, hist, xerr=hists.bin_widths / 2, yerr=hist_error)
966 self.plots.append(p)
967 self.x_axis_label = column
968
969 appendix = ''
970 if self.ymax <= self.ymin or self.xmax <= self.xmin:
971 appendix = ' No data to plot!'
972
973 if label is None:
974 self.labels.append(column + appendix)
975 else:
976 self.labels.append(label + appendix)
977 return self
978
979 def finish(self):
980 """
981 Sets limits, title, axis-labels and legend of the plot
982 """
983 self.axis.set_title("Distribution Plot")
984 self.axis.get_xaxis().set_label_text(self.x_axis_label)
985
986 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
987
988 if self.ymax <= self.ymin or self.xmax <= self.xmin:
989 self.axis.set_xlim((0., 1.))
990 self.axis.set_ylim((0., 1.))
991 self.axis.text(0.36, 0.5, 'No data to plot', fontsize=60, color='black')
992 return self
993
994 self.scale_limits()
995 self.setAxisLimits(factor=0.01)
996
998 self.axis.get_yaxis().set_label_text('# Entries per Bin / (# Entries * Bin Width)')
999 elif self.normed_to_all_entries:
1000 self.axis.get_yaxis().set_label_text('# Entries per Bin / # Entries')
1001 elif self.normed_to_bin_width:
1002 self.axis.get_yaxis().set_label_text('# Entries per Bin / Bin Width')
1003 else:
1004 self.axis.get_yaxis().set_label_text('# Entries per Bin')
1005
1006 return self
1007
1008
1010 """
1011 Create a boxplot
1012 """
1013
1015
1016 def __init__(self, figure=None, axis=None, x_axis_label=None):
1017 """
1018 Creates a new figure and axis if None is given, sets the default plot parameters
1019 @param figure default draw figure which is used
1020 @param axis default draw axis which is used
1021 """
1022 super().__init__(figure=figure, axis=axis)
1023
1024
1025 self.x_axis_label = x_axis_label
1026
1027 def add(self, data, column, mask=None, weight_column=None):
1028 """
1029 Add a new boxplot to the plots
1030 @param data pandas.DataFrame containing all data
1031 @param column which is used to calculate boxplot quantities
1032 @param mask boolean numpy.array defining which events are used for the histogram
1033 @param weight_column column in data containing the weights for each event
1034 """
1035 if mask is None:
1036 mask = numpy.ones(len(data)).astype('bool')
1037 x = data[column][mask]
1038 if weight_column is not None:
1039 # weight = data[weight_column][mask]
1040 b2.B2WARNING("Weights are currently not used in boxplot, due to limitations in matplotlib")
1041
1042 if len(x) == 0:
1043 b2.B2WARNING("Ignore empty boxplot.")
1044 return self
1045
1046 # we don't plot outliers as they cause the file size to explode if large datasets are used
1047 p = self.axis.boxplot(x, sym='k.', whis=1.5, vert=False, patch_artist=True, showmeans=True, widths=1,
1048 boxprops=dict(facecolor='blue', alpha=0.5), showfliers=False,
1049 # medianprobs=dict(color='blue'),
1050 # meanprobs=dict(color='red'),
1051 )
1052 self.plots.append(p)
1053 self.labels.append(column)
1054 if not self.x_axis_label:
1055 self.x_axis_label = column
1056 r"""
1057 self.axis.text(0.1, 0.9, (r'$ \mu = {:.2f}$' + '\n' + r'$median = {:.2f}$').format(x.mean(), x.median()),
1058 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)
1059 self.axis.text(0.4, 0.9, (r'$ \sigma = {:.2f}$' + '\n' + r'$IQD = {:.2f}$').format(x.std(),
1060 x.quantile(0.75) - x.quantile(0.25)),
1061 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)
1062 self.axis.text(0.7, 0.9, (r'$min = {:.2f}$' + '\n' + r'$max = {:.2f}$').format(x.min(), x.max()),
1063 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)
1064 """
1065
1066 return self
1067
1068 def finish(self):
1069 """
1070 Sets limits, title, axis-labels and legend of the plot
1071 """
1072 matplotlib.artist.setp(self.axis.get_yaxis(), visible=False)
1073 self.axis.get_xaxis().set_label_text(self.x_axis_label)
1074 self.axis.set_title("Box Plot")
1075 return self
1076
1077
1079 """
1080 Plots the difference between two histograms
1081 """
1082
1094
1095 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
1096 """
1097 Creates a new figure and axis if None is given, sets the default plot parameters
1098 @param figure default draw figure which is used
1099 @param axis default draw axis which is used
1100 @param normed normalize minuend and subtrahend before comparing them
1101 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
1102 """
1103 super().__init__(figure, axis)
1104 self.normed = normed
1105 self.shift_to_zero = shift_to_zero
1106 if self.normed:
1107 self.ymin = -0.01
1108 self.ymax = 0.01
1109 else:
1110 self.ymin = -1
1111 self.ymax = 1
1112
1113 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None):
1114 """
1115 Add a new difference plot
1116 @param data pandas.DataFrame containing all data
1117 @param column which is used to calculate distribution histogram
1118 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
1119 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
1120 @param weight_column column in data containing the weights for each event
1121 @param label label for the legend if None, the column name is used
1122 """
1123 hists = histogram.Histograms(data, column, {'Minuend': minuend_mask, 'Subtrahend': subtrahend_mask},
1124 weight_column=weight_column, equal_frequency=False)
1125 minuend, minuend_error = hists.get_hist('Minuend')
1126 subtrahend, subtrahend_error = hists.get_hist('Subtrahend')
1127
1128 difference_error = histogram.poisson_error(minuend + subtrahend)
1129 if self.normed:
1130 difference_error = difference_error / (numpy.sum(minuend) + numpy.sum(subtrahend))
1131 minuend = minuend / numpy.sum(minuend)
1132 subtrahend = subtrahend / numpy.sum(subtrahend)
1133 difference = minuend - subtrahend
1134
1135 if self.shift_to_zero:
1136 difference = difference - numpy.mean(difference)
1137
1138 self.xmin, self.xmax = numpy.nanmin(
1139 numpy.append(
1140 hists.bin_centers, self.xmin)), numpy.nanmax(
1141 numpy.append(
1142 hists.bin_centers, self.xmax))
1143 self.ymin, self.ymax = numpy.nanmin(numpy.append(difference - difference_error, self.ymin)
1144 ), numpy.nanmax(numpy.append(difference + difference_error, self.ymax))
1145
1146 self.set_errorbar_options({'fmt': '-o'})
1147 p = self._plot_datapoints(self.axis, hists.bin_centers, difference, xerr=hists.bin_widths / 2, yerr=difference_error)
1148 self.plots.append(p)
1149 if label is None:
1150 self.labels.append(label)
1151 else:
1152 self.labels.append(column)
1153 self.x_axis_label = column
1154 return self
1155
1156 def finish(self, line_color='black'):
1157 """
1158 Sets limits, title, axis-labels and legend of the plot
1159 """
1160 self.axis.plot((self.xmin, self.xmax), (0, 0), color=line_color, linewidth=4, rasterized=True)
1161 self.scale_limits()
1162 self.setAxisLimits(factor=0.01)
1163 self.axis.set_title("Difference Plot")
1164 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
1165 self.axis.get_xaxis().set_label_text(self.x_axis_label)
1166 self.axis.get_yaxis().set_label_text('Diff.')
1167 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
1168 return self
1169
1170
1172 """
1173 Create TMVA-like overtraining control plot for a classification training
1174 """
1175
1176
1177 figure = None
1178
1179 axis = None
1180
1181 axis_d1 = None
1182
1183 axis_d2 = None
1184
1185 def __init__(self, figure=None, dpi=None):
1186 """
1187 Creates a new figure if None is given, sets the default plot parameters
1188 @param figure default draw figure which is used
1189 @param dpi dpi for the matplotlib figure, if None default is used
1190 """
1191
1192 self.dpi = dpi
1193 if figure is None:
1194
1195 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=self.dpi)
1196 else:
1197 self.figure = figure
1198
1199 gs = matplotlib.gridspec.GridSpec(5, 1)
1200
1201 self.axis = self.figure.add_subplot(gs[:3, :])
1202
1203 self.axis_d1 = self.figure.add_subplot(gs[3, :], sharex=self.axis)
1204
1205 self.axis_d2 = self.figure.add_subplot(gs[4, :], sharex=self.axis)
1206
1207 super().__init__(self.figure, self.axis)
1208
1209 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None):
1210 """
1211 Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
1212 otherwise there are too many curves in the plot to recognize anything in the plot.
1213 @param data pandas.DataFrame containing all data
1214 @param column which is used to calculate distribution histogram
1215 @param train_mask boolean numpy.array defining which events are training events
1216 @param test_mask boolean numpy.array defining which events are test events
1217 @param signal_mask boolean numpy.array defining which events are signal events
1218 @param bckgrd_mask boolean numpy.array defining which events are background events
1219 @param weight_column column in data containing the weights for each event
1220 """
1221 distribution = Distribution(self.figure, self.axis, normed_to_all_entries=True)
1222 self.axis.set_yscale('log')
1223
1224 distribution.set_plot_options(self.plot_kwargs)
1225 distribution.set_errorbar_options(self.errorbar_kwargs)
1226 distribution.set_errorband_options(self.errorband_kwargs)
1227 distribution.add(data, column, test_mask & signal_mask, weight_column)
1228 distribution.add(data, column, test_mask & bckgrd_mask, weight_column)
1229
1230 distribution.set_plot_options(
1231 {'color': distribution.plots[0][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
1232 distribution.set_fill_options({'color': distribution.plots[0][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
1233 distribution.set_errorbar_options(None)
1234 distribution.set_errorband_options(None)
1235 distribution.add(data, column, train_mask & signal_mask, weight_column)
1236 distribution.set_plot_options(
1237 {'color': distribution.plots[1][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
1238 distribution.set_fill_options({'color': distribution.plots[1][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
1239 distribution.add(data, column, train_mask & bckgrd_mask, weight_column)
1240
1241 distribution.labels = ['Test-Signal', 'Test-Background', 'Train-Signal', 'Train-Background']
1242 distribution.finish()
1243
1244 self.plot_kwargs['color'] = distribution.plots[0][0][0].get_color()
1245 difference_signal = Difference(self.figure, self.axis_d1, shift_to_zero=True, normed=True)
1246 difference_signal.set_plot_options(self.plot_kwargs)
1247 difference_signal.set_errorbar_options(self.errorbar_kwargs)
1248 difference_signal.set_errorband_options(self.errorband_kwargs)
1249 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column)
1250 self.axis_d1.set_xlim((difference_signal.xmin, difference_signal.xmax))
1251 self.axis_d1.set_ylim((difference_signal.ymin, difference_signal.ymax))
1252 difference_signal.plots = difference_signal.labels = []
1253 difference_signal.finish(line_color=distribution.plots[0][0][0].get_color())
1254
1255 self.plot_kwargs['color'] = distribution.plots[1][0][0].get_color()
1256 difference_bckgrd = Difference(self.figure, self.axis_d2, shift_to_zero=True, normed=True)
1257 difference_bckgrd.set_plot_options(self.plot_kwargs)
1258 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
1259 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
1260 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column)
1261 self.axis_d2.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
1262 self.axis_d2.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
1263 difference_bckgrd.plots = difference_bckgrd.labels = []
1264 difference_bckgrd.finish(line_color=distribution.plots[1][0][0].get_color())
1265
1266 try:
1267 import scipy.stats
1268 # Kolmogorov smirnov test
1269 if len(data[column][train_mask & signal_mask]) == 0 or len(data[column][test_mask & signal_mask]) == 0:
1270 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for signal due to missing data")
1271 else:
1272 ks = scipy.stats.ks_2samp(data[column][train_mask & signal_mask], data[column][test_mask & signal_mask])
1273 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
1274 self.axis_d1.text(0.1, 0.9, r'signal (train - test) difference $p={:.2f}$'.format(ks[1]), bbox=props,
1275 verticalalignment='top', horizontalalignment='left', transform=self.axis_d1.transAxes)
1276 if len(data[column][train_mask & bckgrd_mask]) == 0 or len(data[column][test_mask & bckgrd_mask]) == 0:
1277 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for background due to missing data")
1278 else:
1279 ks = scipy.stats.ks_2samp(data[column][train_mask & bckgrd_mask], data[column][test_mask & bckgrd_mask])
1280 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
1281 self.axis_d2.text(0.1, 0.9, r'background (train - test) difference $p={:.2f}$'.format(ks[1]),
1282 bbox=props,
1283 verticalalignment='top', horizontalalignment='left', transform=self.axis_d2.transAxes)
1284 except ImportError:
1285 b2.B2WARNING("Cannot calculate kolmogorov smirnov test please install scipy!")
1286
1287 return self
1288
1289 def finish(self):
1290 """
1291 Sets limits, title, axis-labels and legend of the plot
1292 """
1293 self.axis.set_title("Overtraining Plot")
1294 self.axis_d1.set_title("")
1295 self.axis_d2.set_title("")
1296 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=False)
1297 matplotlib.artist.setp(self.axis_d1.get_xticklabels(), visible=False)
1298 self.axis.get_xaxis().set_label_text('')
1299 self.axis_d1.get_xaxis().set_label_text('')
1300 self.axis_d2.get_xaxis().set_label_text('Classifier Output')
1301 return self
1302
1303
1305 """
1306 Plots distribution of a quantity including boxplots
1307 """
1308
1309
1310 box_axes = None
1311
1312 def __init__(self, figure=None, axis=None, normed=False, range_in_std=None, x_axis_label=None):
1313 """
1314 Creates a new figure and axis if None is given, sets the default plot parameters
1315 @param figure default draw figure which is used
1316 @param axis default draw axis which is used
1317 @param normed true if the histograms should be normed before drawing
1318 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
1319 """
1320 super().__init__(figure, axis)
1321
1322 self.normed = normed
1323
1324 self.range_in_std = range_in_std
1325
1326 self.box_axes = []
1327
1328 self.distribution = Distribution(self.figure, self.axis, normed_to_all_entries=self.normed, range_in_std=self.range_in_std)
1329
1330 self.x_axis_label = x_axis_label
1331
1332 def add(self, data, column, mask=None, weight_column=None, label=None):
1333 """
1334 Add a new distribution plot, with additional information like a boxplot compared to
1335 the ordinary Distribution plot.
1336 @param data pandas.DataFrame containing all data
1337 @param column which is used to calculate distribution histogram
1338 @param mask boolean numpy.array defining which events are used for the distribution histogram
1339 @param weight_column column in data containing the weights for each event
1340 @param label label for the plot legend
1341 """
1345 self.distribution.add(data, column, mask, weight_column, label=label)
1346
1347 n = len(self.box_axes) + 1
1348 gs = matplotlib.gridspec.GridSpec(4 * n, 1)
1349 gridspecs = [gs[:3 * n, :]] + [gs[3 * n + i, :] for i in range(n)]
1350 box_axis = self.add_subplot(gridspecs)
1351
1352 if self.range_in_std is not None:
1353 mean, std = histogram.weighted_mean_and_std(data[column], None if weight_column is None else data[weight_column])
1354 # Everything outside mean +- range_in_std * std is considered not inside the mask
1355 mask = mask & (data[column] > (mean - self.range_in_std * std)) & (data[column] < (mean + self.range_in_std * std))
1356 box = Box(self.figure, box_axis, x_axis_label=self.x_axis_label)
1357 box.add(data, column, mask, weight_column)
1358 if len(box.plots) > 0:
1359 box.plots[0]['boxes'][0].set_facecolor(self.distribution.plots[-1][0][0].get_color())
1360 box.finish()
1361
1362 self.box_axes.append(box_axis)
1363 return self
1364
1365 def finish(self):
1366 """
1367 Sets limits, title, axis-labels and legend of the plot
1368 """
1369 self.distribution.finish()
1370 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=False)
1371 self.axis.get_xaxis().set_label_text('')
1372 for box_axis in self.box_axes[:-1]:
1373 matplotlib.artist.setp(box_axis.get_xticklabels(), visible=False)
1374 box_axis.set_title("")
1375 box_axis.get_xaxis().set_label_text('')
1376 self.box_axes[-1].set_title("")
1377 self.axis.set_title("Distribution Plot")
1378 self.axis.legend([x[0] for x in self.distribution.plots], self.distribution.labels,
1379 loc='best', fancybox=True, framealpha=0.5)
1380 return self
1381
1382
1384 """
1385 Plots change of a distribution of a quantity depending on the cut on a classifier
1386 """
1387
1388 figure = None
1389
1390 axis = None
1391
1392 axis_d1 = None
1393
1394 axis_d2 = None
1395
1396 def __init__(self, figure=None, dpi=None):
1397 """
1398 Creates a new figure if None is given, sets the default plot parameters
1399 @param figure default draw figure which is used
1400 @param dpi dpi for the matplotlib figure, if None default is used
1401 """
1402
1403 self.dpi = dpi
1404 if figure is None:
1405
1406 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=self.dpi)
1407 else:
1408 self.figure = figure
1409
1410 gs = matplotlib.gridspec.GridSpec(3, 2)
1411
1412 self.axis = self.figure.add_subplot(gs[0, :])
1413
1414 self.axis_d1 = self.figure.add_subplot(gs[1, :], sharex=self.axis)
1415
1416 self.axis_d2 = self.figure.add_subplot(gs[2, :], sharex=self.axis)
1417
1418 super().__init__(self.figure, self.axis)
1419
1420 def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None):
1421 """
1422 Add a new correlation plot.
1423 @param data pandas.DataFrame containing all data
1424 @param column which is used to calculate distribution histogram
1425 @param cut_column which is used to calculate cut on the other quantity defined by column
1426 @param quantiles list of quantiles between 0 and 100, defining the different cuts
1427 @param weight_column column in data containing the weights for each event
1428 """
1429 if len(data[cut_column]) == 0:
1430 b2.B2WARNING("Ignore empty Correlation.")
1431 return self
1432
1433 axes = [self.axis, self.axis_d1, self.axis_d2]
1434
1435 for i, (l, m) in enumerate([('.', signal_mask | bckgrd_mask), ('S', signal_mask), ('B', bckgrd_mask)]):
1436 if weight_column is not None:
1437 weights = numpy.array(data[weight_column][m])
1438 else:
1439 weights = numpy.ones(len(data[column][m]))
1440
1441 xrange = numpy.percentile(data[column][m], [5, 95])
1442 isfinite = numpy.isfinite(data[column][m])
1443 if not numpy.all(isfinite):
1444 xrange = numpy.percentile(data[column][m][isfinite], [5, 95])
1445 elif numpy.all(numpy.isnan(data[column][m])):
1446 b2.B2WARNING("All data is NaN, cannot calculate range and ignore Correlation.")
1447 return self
1448
1449 colormap = plt.get_cmap('coolwarm')
1450 tmp, x = numpy.histogram(data[column][m], bins=100,
1451 range=xrange, density=True, weights=weights)
1452 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1453 axes[i].plot(bin_center, tmp, color='black', lw=1)
1454
1455 for quantil in numpy.arange(5, 100, 5):
1456 cut = numpy.percentile(data[cut_column][m], quantil)
1457 sel = data[cut_column][m] >= cut
1458 y, x = numpy.histogram(data[column][m][sel], bins=100,
1459 range=xrange, density=True, weights=weights[sel])
1460 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1461 axes[i].fill_between(bin_center, tmp, y, color=colormap(quantil / 100.0))
1462 tmp = y
1463
1464 axes[i].set_ylim(bottom=0)
1465
1466 flatness_score = basf2_mva_util.calculate_flatness(data[column][m], data[cut_column][m], weights)
1467 axes[i].set_title(r'Distribution for different quantiles: $\mathrm{{Flatness}}_{} = {:.3f}$'.format(l, flatness_score))
1468 return self
1469
1470 def finish(self):
1471 """
1472 Sets limits, title, axis-labels and legend of the plot
1473 """
1474 return self
1475
1476
1478 """
1479 Plots multivariate distribution using TSNE algorithm
1480 """
1481
1482 def add(self, data, columns, *masks):
1483 """
1484 Add a new correlation plot.
1485 @param data pandas.DataFrame containing all data
1486 @param columns which are used to calculate the correlations
1487 @param masks different classes to show in TSNE
1488 """
1489 try:
1490 import sklearn
1491 import sklearn.manifold
1492 model = sklearn.manifold.TSNE(n_components=2, random_state=0)
1493 data = numpy.array([data[column] for column in columns]).T
1494 model.fit(data)
1495 for mask in masks:
1496 data = numpy.array([data[column][mask] for column in columns]).T
1497 data = model.transform(data)
1498 self.axis.scatter(data[:, 0], data[:, 1], rasterized=True)
1499 except ImportError:
1500 print("Cannot create TSNE plot. Install sklearn if you want it")
1501 return self
1502
1503 def finish(self):
1504 """
1505 Sets limits, title, axis-labels and legend of the plot
1506 """
1507 return self
1508
1509
1511 """
1512 Plots importance matrix
1513 """
1514
1515 def add(self, data, columns, variables, importance_scale='normalized'):
1516 """
1517 Add a new correlation plot.
1518 @param data pandas.DataFrame containing all data
1519 @param columns which are used to calculate the correlations
1520 @param variables variable names (y-axis labels)
1521 @param importance_scale 'normalized' (default, columns sum to 100) or 'hundredzero' (per-column min=0/max=100)
1522 """
1523
1524 raw = numpy.vstack([numpy.array(data[column]) for column in columns]).T
1525
1526 if importance_scale == 'hundredzero':
1527 def norm(x):
1528 width = numpy.max(x) - numpy.min(x)
1529 if width <= 0:
1530 return numpy.zeros(x.shape)
1531 return (x - numpy.min(x)) / width * 100
1532 importance_matrix = numpy.vstack([norm(raw[:, i]) for i in range(raw.shape[1])]).T
1533 vmin, vmax = 0.0, 100.0
1534 fmt = '.0f'
1535 else:
1536 # normalized: each column sums to 100
1537 col_sums = raw.sum(axis=0, keepdims=True)
1538 col_sums[col_sums == 0] = 1 # avoid division by zero for all-zero columns
1539 importance_matrix = raw / col_sums * 100
1540 vmin = numpy.min(importance_matrix) if importance_matrix.size > 0 else 0.0
1541 vmax = numpy.max(importance_matrix) if importance_matrix.size > 0 else 100.0
1542 fmt = '.2g'
1543
1544 im = self.axis.imshow(
1545 importance_matrix[::-1], # <- reverse rows
1546 cmap=plt.cm.RdBu,
1547 vmin=vmin,
1548 vmax=vmax,
1549 aspect='equal',
1550 interpolation='nearest',
1551 origin='upper'
1552 )
1553
1554 num_y, num_x = importance_matrix.shape
1555
1556 # Dynamic figure sizing and font
1557 n_chars = 5
1558 cell_pt = max(36, min(108, 576 / max(num_x, num_y)))
1559 fig_w_pt = max(864, num_x * cell_pt + 252)
1560 fig_h_pt = max(576, num_y * cell_pt + 144)
1561 self.figure.set_size_inches(fig_w_pt / 72, fig_h_pt / 72)
1562 font_size = max(6, int(min(14, 0.8 * cell_pt / (n_chars * 0.6))))
1563
1564 # Tick positions and labels
1565 self.axis.set_xticks(numpy.arange(num_x))
1566 self.axis.set_yticks(numpy.arange(num_y))
1567
1568 self.axis.set_xticklabels(columns, rotation=90, fontsize=font_size)
1569 self.axis.set_yticklabels(reversed(variables), fontsize=font_size)
1570
1571 self.axis.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)
1572
1573 # Add text annotations
1574 for y in range(num_y):
1575 for x in range(num_x):
1576 value = importance_matrix[-1-y, x] # Reverse y-axis for correct annotation
1577 txt = self.axis.text(
1578 x, y, f'{value:{fmt}}',
1579 ha='center', va='center',
1580 fontsize=font_size,
1581 color='white'
1582 )
1583 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='black')])
1584
1585 # Colorbar
1586 if importance_scale == 'hundredzero':
1587 cb = self.figure.colorbar(im, ax=self.axis, ticks=[0.0, 100.0], orientation='vertical')
1588 cb.ax.set_yticklabels(['low', 'high'])
1589 else:
1590 cb = self.figure.colorbar(im, ax=self.axis, orientation='vertical')
1591 cb.solids.set_rasterized(True)
1592
1593 # Layout tightening
1594 self.axis.set_xlim(-0.5, num_x - 0.5)
1595 self.axis.set_ylim(num_y - 0.5, -0.5) # origin='upper' flips y
1596
1597 return self
1598
1599 def finish(self):
1600 """
1601 Sets limits, title, axis-labels and legend of the plot
1602 """
1603 return self
1604
1605
1607 """
1608 Plots correlation matrix
1609 """
1610
1611 figure = None
1612
1613 signal_axis = None
1614
1615 bckgrd_axis = None
1616
1617 def __init__(self, figure=None, dpi=None):
1618 """
1619 Creates a new figure if None is given, sets the default plot parameters
1620 @param figure default draw figure which is used
1621 @param dpi dpi for the matplotlib figure, if None default is used
1622 """
1623
1624 self.dpi = dpi
1625 if figure is None:
1626
1627 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=self.dpi)
1628 else:
1629 self.figure = figure
1630
1631 gs = matplotlib.gridspec.GridSpec(8, 2)
1632
1633 self.signal_axis = self.figure.add_subplot(gs[:6, 0])
1634
1635 self.bckgrd_axis = self.figure.add_subplot(gs[:6, 1], sharey=self.signal_axis)
1636
1637 self.colorbar_axis = self.figure.add_subplot(gs[7, :])
1638
1639 self.axis = self.signal_axis
1640
1641 super().__init__(self.figure, self.axis)
1642
1643 def add(self, data, columns, signal_mask, bckgrd_mask):
1644 """
1645 Add a new correlation plot.
1646 @param data pandas.DataFrame containing all data
1647 @param columns which are used to calculate the correlations
1648 """
1649 num_vars = len(columns)
1650 font_size = max(4, min(14, 200 // num_vars)) # Scale font size
1651
1652 signal_corr = numpy.corrcoef(numpy.vstack([data[column][signal_mask] for column in columns])) * 100
1653 bckgrd_corr = numpy.corrcoef(numpy.vstack([data[column][bckgrd_mask] for column in columns])) * 100
1654
1655 signal_heatmap = self.signal_axis.imshow(
1656 signal_corr[::-1, ::-1], # <- reverse rows and columns
1657 cmap=plt.cm.RdBu,
1658 vmin=-100.0,
1659 vmax=100.0,
1660 origin='upper',
1661 aspect='auto',
1662 interpolation='nearest')
1663 self.bckgrd_axis.imshow(
1664 bckgrd_corr[::-1, ::-1], # <- reverse rows and columns
1665 cmap=plt.cm.RdBu,
1666 vmin=-100.0,
1667 vmax=100.0,
1668 origin='upper',
1669 aspect='auto',
1670 interpolation='nearest')
1671
1672 # Tick positions
1673 tick_positions = numpy.arange(num_vars)
1674
1675 # Signal ticks
1676 self.signal_axis.set_xlabel('Signal')
1677 self.signal_axis.set_xticks(tick_positions)
1678 self.signal_axis.set_yticks(tick_positions)
1679 self.signal_axis.set_xticklabels(reversed(columns), rotation=90, fontsize=font_size)
1680 self.signal_axis.set_yticklabels(reversed(columns), fontsize=font_size)
1681 self.signal_axis.xaxis.tick_top()
1682 self.signal_axis.invert_yaxis()
1683
1684 # Background ticks
1685 self.bckgrd_axis.set_xlabel('Background')
1686 self.bckgrd_axis.set_xticks(tick_positions)
1687 self.bckgrd_axis.set_yticks(tick_positions)
1688 self.bckgrd_axis.set_xticklabels(reversed(columns), rotation=90, fontsize=font_size)
1689 self.bckgrd_axis.set_yticklabels(reversed(columns), fontsize=font_size)
1690 self.bckgrd_axis.xaxis.tick_top()
1691 self.bckgrd_axis.invert_yaxis()
1692
1693 # Add annotation text
1694 for y in range(num_vars):
1695 for x in range(num_vars):
1696 txt = self.signal_axis.text(x, y, f'{signal_corr[-1-y, -1-x]:.0f}',
1697 ha='center', va='center',
1698 fontsize=font_size,
1699 color='white')
1700 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='k')])
1701 txt = self.bckgrd_axis.text(x, y, f'{bckgrd_corr[-1-y, -1-x]:.0f}',
1702 ha='center', va='center',
1703 fontsize=font_size,
1704 color='white')
1705 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='k')])
1706
1707 # Colorbar
1708 cb = self.figure.colorbar(signal_heatmap, cax=self.colorbar_axis,
1709 ticks=[-100, 0, 100], orientation='horizontal')
1710 cb.solids.set_rasterized(True)
1711 cb.ax.set_xticklabels(['negative', 'uncorrelated', 'positive'])
1712
1713 return self
1714
1715 def finish(self):
1716 """
1717 Sets limits, title, axis-labels and legend of the plot
1718 """
1719 matplotlib.artist.setp(self.bckgrd_axis.get_yticklabels(), visible=False)
1720 return self
1721
1722
1723if __name__ == '__main__':
1724
1725 def get_data(N, columns):
1726 """
1727 Creates fake data for example plots
1728 """
1729 N /= 2
1730 n = len(columns) - 1
1731 xs = numpy.random.normal(0, size=(N, n))
1732 xb = numpy.random.normal(1, size=(N, n))
1733 ys = numpy.zeros(N)
1734 yb = numpy.ones(N)
1735 data = pandas.DataFrame(numpy.c_[numpy.r_[xs, xb], numpy.r_[ys, yb]], columns=columns)
1736 return data.reindex(numpy.random.permutation(data.index))
1737
1738 import seaborn
1739 # Set nice searborn settings
1740 seaborn.set(font_scale=3)
1741 seaborn.set_style('whitegrid')
1742
1743 # Standard plots
1744 N = 100000
1745 data = get_data(N, columns=['FastBDT', 'NeuroBayes', 'isSignal'])
1746 data['type'] = ''
1747 data.type.iloc[:N / 2] = 'Train'
1748 data.type.iloc[N / 2:] = 'Test'
1749
1750 p = Box()
1751 p.add(data, 'FastBDT')
1752 p.finish()
1753 p.save('box_plot.png')
1754
1756 p.add(data, 'FastBDT')
1757 p.add(data, 'NeuroBayes')
1758 p.finish()
1759 p.save('verbose_distribution_plot.png')
1760
1762 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1763 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1764 p.finish()
1765 p.save('roc_purity_plot.png')
1766
1768 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1769 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1770 p.finish()
1771 p.save('roc_rejection_plot.png')
1772
1773 p = Diagonal()
1774 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1775 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1776 p.finish()
1777 p.save('diagonal_plot.png')
1778
1779 p = Distribution()
1780 p.add(data, 'FastBDT')
1781 p.add(data, 'NeuroBayes')
1782 p.finish()
1783 p.save('distribution_plot.png')
1784
1785 p = Difference()
1786 p.add(data, 'FastBDT', data['type'] == 'Train', data['type'] == 'Test')
1787 p.add(data, 'NeuroBayes', data['type'] == 'Train', data['type'] == 'Test')
1788 p.finish()
1789 p.save('difference_plot.png')
1790
1791 p = Overtraining()
1792 p.add(data, 'FastBDT', data['type'] == 'Train', data['type'] == 'Test', data['isSignal'] == 1, data['isSignal'] == 0)
1793 p.finish()
1794 p.save('overtraining_plot.png')
1795
1796 p = Correlation()
1797 p.add(data, 'FastBDT', 'NeuroBayes', [0, 20, 40, 60, 80, 100], data['isSignal'] == 0)
1798 p.finish()
1799 p.save('correlation_plot.png')
1800
1801 p = CorrelationMatrix()
1802 data['FastBDT2'] = data['FastBDT']**2
1803 data['NeuroBayes2'] = data['NeuroBayes']**2
1804 data['FastBDT3'] = data['FastBDT']**3
1805 data['NeuroBayes3'] = data['NeuroBayes']**3
1806 p.add(data, ['FastBDT', 'NeuroBayes', 'FastBDT2', 'NeuroBayes2', 'FastBDT3', 'NeuroBayes3'])
1807 p.finish()
1808 p.save('correlation_matrix.png')
calculate_flatness(f, p, w=None)
__init__(self, figure=None, axis=None, x_axis_label=None)
Definition plotting.py:1016
x_axis_label
Label on x axis.
Definition plotting.py:1025
add(self, data, column, mask=None, weight_column=None)
Definition plotting.py:1027
signal_axis
Main axis which shows the correlation of the signal samples.
Definition plotting.py:1613
colorbar_axis
add signal subplot
Definition plotting.py:1637
add(self, data, columns, signal_mask, bckgrd_mask)
Definition plotting.py:1643
__init__(self, figure=None, dpi=None)
Definition plotting.py:1617
bckgrd_axis
Axis which shows the correlation of the background samples.
Definition plotting.py:1615
axis_d1
Axis which shows shape of signal.
Definition plotting.py:1392
__init__(self, figure=None, dpi=None)
Definition plotting.py:1396
axis_d2
Axis which shows shape of background.
Definition plotting.py:1394
add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None)
Definition plotting.py:1420
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:846
x_axis_label
Label on x axis.
Definition plotting.py:1153
shift_to_zero
Mean difference is shifted to zero (removes constant offset) if this is true.
Definition plotting.py:1105
__init__(self, figure=None, axis=None, normed=False, shift_to_zero=False)
Definition plotting.py:1095
add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None)
Definition plotting.py:1113
normed
Minuend and subtrahend are normed before comparing them if this is true.
Definition plotting.py:1104
finish(self, line_color='black')
Definition plotting.py:1156
str x_axis_label
x axis label
Definition plotting.py:925
keep_first_binning
Keep first binning if user wants so.
Definition plotting.py:921
normed_to_all_entries
Normalize histograms before drawing them.
Definition plotting.py:906
first_binning
first binning
Definition plotting.py:923
__init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False, keep_first_binning=False, range_in_std=None)
Definition plotting.py:895
range_in_std
Show only a certain range in terms of standard deviations of the data.
Definition plotting.py:910
add(self, data, column, mask=None, weight_column=None, label=None)
Definition plotting.py:927
normed_to_bin_width
Normalize histograms before drawing them.
Definition plotting.py:908
add(self, data, columns, variables, importance_scale='normalized')
Definition plotting.py:1515
list sub_plots
the subplots which are displayed in the grid
Definition plotting.py:815
__init__(self, cls, number_of_plots, figure=None, dpi=None)
Definition plotting.py:782
add(self, i, *args, **kwargs)
Definition plotting.py:821
axis_d1
Axis which shows the difference between training and test signal.
Definition plotting.py:1181
__init__(self, figure=None, dpi=None)
Definition plotting.py:1185
axis_d2
Axis which shows the difference between training and test background.
Definition plotting.py:1183
add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None)
Definition plotting.py:1209
list plots
Plots added to the axis so far.
Definition plotting.py:59
list labels
Labels of the plots added so far.
Definition plotting.py:61
fill_kwargs
Default keyword arguments for fill_between function.
Definition plotting.py:119
float yscale
create figure
Definition plotting.py:70
save(self, filename)
Definition plotting.py:141
xmin
Minimum x value.
Definition plotting.py:63
add_subplot(self, gridspecs)
Definition plotting.py:129
figure
figure which is used to draw
Definition plotting.py:73
add(self, *args, **kwargs)
Definition plotting.py:257
errorband_kwargs
Default keyword arguments for errorband function.
Definition plotting.py:117
set_fill_options(self, fill_kwargs=None)
Definition plotting.py:176
finish(self, *args, **kwargs)
Definition plotting.py:275
set_plot_options(self, plot_kwargs={ 'linestyle':''})
Definition plotting.py:152
ymax
Maximum y value.
Definition plotting.py:69
prop_cycler
Property cycler used to give plots unique colors.
Definition plotting.py:127
set_errorbar_options(self, errorbar_kwargs={ 'fmt':'.', 'elinewidth':3, 'alpha':1})
Overrides default errorbar options for datapoint errorbars.
Definition plotting.py:160
xmax
Maximum x value.
Definition plotting.py:65
errorbar_kwargs
Default keyword arguments for errorbar function.
Definition plotting.py:115
float xscale
create figure
Definition plotting.py:71
dpi
set default dpi
Definition plotting.py:86
__init__(self, figure=None, axis=None, dpi=None)
Definition plotting.py:77
_plot_datapoints(self, axis, x, y, xerr=None, yerr=None)
Definition plotting.py:184
axis
Main axis which is used to draw.
Definition plotting.py:75
setAxisLimits(self, factor=0.0)
Definition plotting.py:263
ymin
Minimum y value.
Definition plotting.py:67
set_errorband_options(self, errorband_kwargs={ 'alpha':0.5})
Definition plotting.py:168
plot_kwargs
create figure
Definition plotting.py:113
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:697
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True)
Definition plotting.py:301
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:447
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:527
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:378
add(self, data, columns, *masks)
Definition plotting.py:1482
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:612
distribution
create empty list for box axes
Definition plotting.py:1328
list box_axes
Axes for the boxplots.
Definition plotting.py:1310
range_in_std
Show only a certain range in terms of standard deviations of the data.
Definition plotting.py:1324
add(self, data, column, mask=None, weight_column=None, label=None)
Definition plotting.py:1332
__init__(self, figure=None, axis=None, normed=False, range_in_std=None, x_axis_label=None)
Definition plotting.py:1312
normed
Normalize histograms before drawing them.
Definition plotting.py:1322
weighted_mean_and_std(x, w)
Definition histogram.py:31
poisson_error(n_tot)
Definition histogram.py:24
Definition plot.py:1