Belle II Software development
plotting.py
1#!/usr/bin/env python3
2
3
4
11
12import copy
13import math
14
15import pandas
16import numpy
17import itertools
18import matplotlib.pyplot as plt
19import matplotlib.artist
20import matplotlib.figure
21import matplotlib.gridspec
22import matplotlib.colors
23import matplotlib.patches
24import matplotlib.ticker
25import matplotlib.patheffects as PathEffects
26
27
28from basf2_mva_evaluation import histogram
29
30import basf2 as b2
31
32import basf2_mva_util
33import matplotlib
34
35# Do not use standard backend TkAgg, because it is NOT thread-safe
36# You will get an RuntimeError: main thread is not in main loop otherwise!
37matplotlib.use("svg")
38
39# Use the Belle II style while producing the plots
40plt.style.use("belle2")
41
42
43class Plotter:
44 """
45 Base class for all Plotters.
46 """
47
48 # stupid workaround for doxygen refusing to document things
49
50
52
53
57
58
59 plots = None
60
61 labels = None
62
63 xmin = None
64
65 xmax = None
66
67 ymin = None
68
69 ymax = None
70 yscale = 0.0
71 xscale = 0.0
72
73 figure = None
74
75 axis = None
76
77 def __init__(self, figure=None, axis=None, dpi=None):
78 """
79 Creates a new figure and axis if None is given, sets the default plot parameters
80 @param figure default draw figure which is used
81 @param axis default draw axis which is used
82 @param dpi dpi for the matplotlib figure, if None default is used
83 """
84 b2.B2INFO("Create new figure for class " + str(type(self)))
85
86 self.dpi = dpi
87 if figure is None:
88
89 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=dpi)
90 else:
91 self.figure = figure
92
93 if axis is None:
94
95 self.axis = self.figure.add_subplot(1, 1, 1)
96 else:
97 self.axis = axis
98
99
100 self.plots = []
101
102 self.labels = []
103
104 self.xmin, self.xmax = float(0), float(1)
105
106 self.ymin, self.ymax = float(0), float(1)
107
108 self.yscale = 0.1
109
110 self.xscale = 0.0
111
112
113 self.plot_kwargs = None
114
115 self.errorbar_kwargs = None
116
118
119 self.fill_kwargs = None
120
121 self.set_plot_options()
124 self.set_fill_options()
125
126
127 self.prop_cycler = itertools.cycle(plt.rcParams["axes.prop_cycle"])
128
129 def add_subplot(self, gridspecs):
130 """
131 Adds a new subplot to the figure, updates all other axes
132 according to the given gridspec
133 @param gridspecs gridspecs for all axes including the new one
134 """
135 for gs, ax in zip(gridspecs[:-1], self.figure.axes):
136 ax.set_position(gs.get_position(self.figure))
137 ax.set_subplotspec(gs)
138 axis = self.figure.add_subplot(gridspecs[-1], sharex=self.axis)
139 return axis
140
141 def save(self, filename):
142 """
143 Save the figure into a file
144 @param filename of the file
145 """
146 b2.B2INFO("Save figure for class " + str(type(self)))
147 from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
148 canvas = FigureCanvas(self.figure)
149 canvas.print_figure(filename, dpi=self.dpi, bbox_inches='tight')
150 return self
151
152 def set_plot_options(self, plot_kwargs={'linestyle': ''}):
153 """
154 Overrides default plot options for datapoint plot
155 @param plot_kwargs keyword arguments for the plot function
156 """
157 self.plot_kwargs = copy.copy(plot_kwargs)
158 return self
159
160 def set_errorbar_options(self, errorbar_kwargs={'fmt': '.', 'elinewidth': 3, 'alpha': 1}):
161 """
162 Overrides default errorbar options for datapoint errorbars
163 @param errorbar_kwargs keyword arguments for the errorbar function
164 """
165 self.errorbar_kwargs = copy.copy(errorbar_kwargs)
166 return self
167
168 def set_errorband_options(self, errorband_kwargs={'alpha': 0.5}):
169 """
170 Overrides default errorband options for datapoint errorband
171 @param errorbar_kwargs keyword arguments for the fill_between function
172 """
173 self.errorband_kwargs = copy.copy(errorband_kwargs)
174 return self
175
176 def set_fill_options(self, fill_kwargs=None):
177 """
178 Overrides default fill_between options for datapoint errorband
179 @param fill_kwargs keyword arguments for the fill_between function
180 """
181 self.fill_kwargs = copy.copy(fill_kwargs)
182 return self
183
184 def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None):
185 """
186 Plot the given datapoints, with plot, errorbar and make a errorband with fill_between
187 @param x coordinates of the data points
188 @param y coordinates of the data points
189 @param xerr symmetric error on x data points
190 @param yerr symmetric error on y data points
191 """
192 p = e = f = None
193 plot_kwargs = copy.copy(self.plot_kwargs)
194 errorbar_kwargs = copy.copy(self.errorbar_kwargs)
195 errorband_kwargs = copy.copy(self.errorband_kwargs)
196 fill_kwargs = copy.copy(self.fill_kwargs)
197
198 if plot_kwargs is None or 'color' not in plot_kwargs:
199 color = next(self.prop_cycler)
200 color = color['color']
201 plot_kwargs['color'] = color
202 else:
203 color = plot_kwargs['color']
204 color = matplotlib.colors.ColorConverter().to_rgb(color)
205 patch = matplotlib.patches.Patch(color=color, alpha=0.5)
206 patch.get_color = patch.get_facecolor
207 patches = [patch]
208
209 if plot_kwargs is not None:
210 p, = axis.plot(x, y, rasterized=True, **plot_kwargs)
211 patches.append(p)
212
213 if errorbar_kwargs is not None and (xerr is not None or yerr is not None):
214 if 'color' not in errorbar_kwargs:
215 errorbar_kwargs['color'] = color
216 if 'ecolor' not in errorbar_kwargs:
217 errorbar_kwargs['ecolor'] = [0.5 * x for x in color]
218
219 # fully mask nan values.
220 # Needed until https://github.com/matplotlib/matplotlib/pull/23333 makes it into the externals.
221 # TODO: remove in release 8.
222 if not isinstance(xerr, (numpy.ndarray, list)):
223 xerr = xerr*numpy.ones(len(x))
224 if not isinstance(yerr, (numpy.ndarray, list)):
225 yerr = yerr*numpy.ones(len(y))
226 mask = numpy.logical_and.reduce([numpy.isfinite(v) for v in [x, y, xerr, yerr]])
227
228 e = axis.errorbar(
229 x[mask], y[mask], xerr=numpy.where(
230 xerr[mask] < 0, 0.0, xerr[mask]), yerr=numpy.where(
231 yerr[mask] < 0, 0.0, yerr[mask]), rasterized=True, **errorbar_kwargs)
232 patches.append(e)
233
234 if errorband_kwargs is not None and yerr is not None:
235 if 'color' not in errorband_kwargs:
236 errorband_kwargs['color'] = color
237 if xerr is not None:
238 # Ensure that xerr and yerr are iterable numpy arrays
239 xerr = x + xerr - x
240 yerr = y + yerr - y
241 for _x, _y, _xe, _ye in zip(x, y, xerr, yerr):
242 axis.add_patch(matplotlib.patches.Rectangle((_x - _xe, _y - _ye), 2 * _xe, 2 * _ye, rasterized=True,
243 **errorband_kwargs))
244 else:
245 f = axis.fill_between(x, y - yerr, y + yerr, interpolate=True, rasterized=True, **errorband_kwargs)
246
247 if fill_kwargs is not None:
248 # to fill the last bin of a histogram
249 x = numpy.append(x, x[-1]+2*xerr[-1])
250 y = numpy.append(y, y[-1])
251 xerr = numpy.append(xerr, xerr[-1])
252
253 axis.fill_between(x-xerr, y, 0, rasterized=True, **fill_kwargs)
254
255 return (tuple(patches), p, e, f)
256
257 def add(self, *args, **kwargs):
258 """
259 Add a new plot to this plotter
260 """
261 return NotImplemented
262
263 def setAxisLimits(self, factor=0.0):
264 """
265 Sets the limits of the axis with an optional expansion factor.
266
267 Parameters:
268 factor (float): Fraction by which to expand the axis limits beyond the data range.
269 """
270 dx = self.xmax - self.xmin
271 dy = self.ymax - self.ymin
272 self.axis.set_xlim((self.xmin - factor*dx, self.xmax + factor*dx))
273 self.axis.set_ylim((self.ymin - factor*dy, self.ymax + factor*dy))
274
275 def finish(self, *args, **kwargs):
276 """
277 Finish plotting and set labels, legends and stuff
278 """
279 return NotImplemented
280
281 def scale_limits(self):
282 """
283 Scale limits to increase distance to boundaries
284 """
285 self.ymin *= 1.0 - math.copysign(self.yscale, self.ymin)
286 self.ymax *= 1.0 + math.copysign(self.yscale, self.ymax)
287 self.xmin *= 1.0 - math.copysign(self.xscale, self.xmin)
288 self.xmax *= 1.0 + math.copysign(self.xscale, self.xmax)
289 return self
290
291
293 """
294 Plots the purity and the efficiency over the cut value (for cut choosing)
295 """
296
300
301 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
302 """
303 Add a new curve to the plot
304 @param data pandas.DataFrame containing all data
305 @param column which is used to calculate efficiency and purity for different cuts
306 @param signal_mask boolean numpy.array defining which events are signal events
307 @param bckgrd_mask boolean numpy.array defining which events are background events
308 @param weight_column column in data containing the weights for each event
309 @param normed boolean if True, the efficiency and purity are normalized to 1
310 """
311
312 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
313
314 if normed:
315 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
316 purity, purity_error = hists.get_purity(['Signal'], ['Background'])
317 else:
318 efficiency, efficiency_error = hists.get_true_positives(['Signal'])
319 purity, purity_error = hists.get_false_positives(['Background'])
320
321 if isinstance(efficiency, int) and not isinstance(purity, int):
322 efficiency = numpy.array([efficiency] * len(purity))
323 elif isinstance(purity, int) and not isinstance(efficiency, int):
324 purity = numpy.array([purity] * len(efficiency))
325 elif isinstance(purity, int) and isinstance(efficiency, int):
326 efficiency = numpy.array([efficiency])
327 purity = numpy.array([purity])
328 cuts = hists.bin_centers
329
330 self.xmin, self.xmax = numpy.nanmin(numpy.append(cuts, self.xmin)), numpy.nanmax(numpy.append(cuts, self.xmax))
331 self.ymin, self.ymax = numpy.nanmin(
332 numpy.concatenate(
333 (efficiency, purity, [
334 self.ymin]))), numpy.nanmax(
335 numpy.concatenate(
336 (efficiency, purity, [
337 self.ymax])))
338
339 self.set_errorbar_options({'fmt': '-o'})
340 self.plots.append(self._plot_datapoints(self.axis, cuts, efficiency, xerr=0, yerr=efficiency_error))
341
342 if normed:
343 self.labels.append("Efficiency")
344 else:
345 self.labels.append("True positive")
346
347 self.set_errorbar_options({'fmt': '-o'})
348 self.plots.append(self._plot_datapoints(self.axis, cuts, purity, xerr=0, yerr=purity_error))
349
350 if normed:
351 self.labels.append("Purity")
352 else:
353 self.labels.append("False positive")
354
355 self.axis.set_title("Classification Plot")
356
357 return self
358
359 def finish(self):
360 """
361 Sets limits, title, axis-labels and legend of the plot
362 """
363 self.setAxisLimits(factor=0.01)
364 self.axis.get_xaxis().set_label_text('Cut Value')
365 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
366 return self
367
368
370 """
371 Plots the signal to noise ratio over the cut value (for cut choosing)
372 """
373
377
378 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
379 """
380 Add a new curve to the plot
381 @param data pandas.DataFrame containing all data
382 @param column which is used to calculate signal to noise ratio for different cuts
383 @param signal_mask boolean numpy.array defining which events are signal events
384 @param bckgrd_mask boolean numpy.array defining which events are background events
385 @param weight_column column in data containing the weights for each event
386 @param label label for the plot legend
387 """
388 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
389 signal2noise, signal2noise_error = hists.get_signal_to_noise(['Signal'], ['Background'])
390 cuts = hists.bin_centers
391
392 valid = numpy.isfinite(signal2noise)
393 signal2noise = signal2noise[valid]
394 signal2noise_error = signal2noise_error[valid]
395 cuts = cuts[valid]
396
397 # Determine "best" cut by maximizing Signal to Noise
398 if len(signal2noise) == 0 or numpy.all(numpy.isnan(signal2noise)):
399 best_idx = None
400 else:
401 best_idx = numpy.nanargmax(signal2noise)
402 best_cut = cuts[best_idx]
403 best_signal2noise = signal2noise[best_idx]
404
405 self.xmin, self.xmax = numpy.nanmin(numpy.append(cuts, self.xmin)), numpy.nanmax(numpy.append(cuts, self.xmax))
406 self.ymin, self.ymax = numpy.nanmin(
407 numpy.append(
408 signal2noise, self.ymin)), numpy.nanmax(
409 numpy.append(
410 signal2noise, self.ymax))
411
412 self.set_errorbar_options({'fmt': '-o'})
413 p = self._plot_datapoints(self.axis, cuts, signal2noise, xerr=0, yerr=signal2noise_error)
414 self.plots.append(p)
415
416 # Plot best cut point
417 if best_idx is not None:
418 self.axis.plot(best_cut, best_signal2noise, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
419 self.axis.axvline(best_cut, color=p[1].get_color(), linestyle='dashed', linewidth=1)
420 self.axis.axhline(best_signal2noise, color=p[1].get_color(), linestyle='dashed', linewidth=1)
421
422 # Add label with best cut info
423 cut_label = f"{label[:10] if label else column[:10]} (Best cut: {best_cut:.3f}, S/N: {best_signal2noise:.2f})"
424 self.labels.append(cut_label)
425 return self
426
427 def finish(self):
428 """
429 Sets limits, title, axis-labels and legend of the plot
430 """
431 self.setAxisLimits(factor=0.05)
432 self.axis.set_title("Signal to Noise Plot")
433 self.axis.get_xaxis().set_label_text('Cut Value')
434 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
435 return self
436
437
439 """
440 Plots the purity over the efficiency also known as ROC curve
441 """
442
446
447 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
448 """
449 Add a new curve to the ROC plot
450 @param data pandas.DataFrame containing all data
451 @param column which is used to calculate efficiency and purity for different cuts
452 @param signal_mask boolean numpy.array defining which events are signal events
453 @param bckgrd_mask boolean numpy.array defining which events are background events
454 @param weight_column column in data containing the weights for each event
455 @param label label for the plot legend
456 """
457 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
458 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
459 purity, purity_error = hists.get_purity(['Signal'], ['Background'])
460 if isinstance(efficiency, int) and not isinstance(purity, int):
461 efficiency = numpy.array([efficiency] * len(purity))
462 elif isinstance(purity, int) and not isinstance(efficiency, int):
463 purity = numpy.array([purity] * len(efficiency))
464 elif isinstance(purity, int) and isinstance(efficiency, int):
465 efficiency = numpy.array([efficiency])
466 purity = numpy.array([purity])
467 cuts = hists.bin_centers
468
469 valid = numpy.isfinite(purity) & numpy.isfinite(efficiency)
470 efficiency = efficiency[valid]
471 purity = purity[valid]
472 cuts = cuts[valid]
473 if not isinstance(efficiency_error, int):
474 efficiency_error = efficiency_error[valid]
475 if not isinstance(purity_error, int):
476 purity_error = purity_error[valid]
477
478 # Determine "best" cut (closest to point (1,1))
479 distance = numpy.sqrt(numpy.square(1 - purity) + numpy.square(1 - efficiency))
480 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
481 best_idx = None
482 else:
483 best_idx = numpy.nanargmin(distance)
484 best_cut = cuts[best_idx]
485 best_efficiency = efficiency[best_idx]
486 best_purity = purity[best_idx]
487
488 self.xmin, self.xmax = numpy.nanmin(numpy.append(efficiency, self.xmin)), numpy.nanmax(numpy.append(efficiency, self.xmax))
489 self.ymin, self.ymax = numpy.nanmin(numpy.append(purity, self.ymin)), numpy.nanmax(numpy.append(purity, self.ymax))
490
491 self.set_errorbar_options({'fmt': '-o'})
492 p = self._plot_datapoints(self.axis, efficiency, purity, xerr=efficiency_error, yerr=purity_error)
493 self.plots.append(p)
494
495 if best_idx is not None:
496 # Plot best cut point
497 self.axis.plot(best_efficiency, best_purity, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
498 self.axis.axhline(best_purity, color=p[1].get_color(), linestyle='dashed', linewidth=1)
499 self.axis.axvline(best_efficiency, color=p[1].get_color(), linestyle='dashed', linewidth=1)
500
501 # Add label with best cut info
502 cut_label = f"{label[:10] if label else column[:10]} (Best cut: {best_cut:.3f})"
503 self.labels.append(cut_label)
504 return self
505
506 def finish(self):
507 """
508 Sets limits, title, axis-labels and legend of the plot
509 """
510 self.setAxisLimits(factor=0.01)
511 self.axis.set_title("ROC Purity Plot")
512 self.axis.get_xaxis().set_label_text('Efficiency')
513 self.axis.get_yaxis().set_label_text('Purity')
514 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
515 return self
516
517
519 """
520 Plots the rejection over the efficiency also known as ROC curve
521 """
522
526
527 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
528 """
529 Add a new curve to the ROC plot
530 @param data pandas.DataFrame containing all data
531 @param column which is used to calculate efficiency and purity for different cuts
532 @param signal_mask boolean numpy.array defining which events are signal events
533 @param bckgrd_mask boolean numpy.array defining which events are background events
534 @param weight_column column in data containing the weights for each event
535 @param label label for the plot legend
536 """
537 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
538 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
539 rejection, rejection_error = hists.get_efficiency(['Background'])
540 rejection = 1 - rejection
541 if isinstance(efficiency, int) and not isinstance(rejection, int):
542 efficiency = numpy.array([efficiency] * len(rejection))
543 elif isinstance(rejection, int) and not isinstance(efficiency, int):
544 rejection = numpy.array([rejection] * len(efficiency))
545 elif isinstance(rejection, int) and isinstance(efficiency, int):
546 efficiency = numpy.array([efficiency])
547 rejection = numpy.array([rejection])
548 cuts = hists.bin_centers
549
550 valid = numpy.isfinite(rejection) & numpy.isfinite(efficiency)
551 efficiency = efficiency[valid]
552 rejection = rejection[valid]
553 cuts = cuts[valid]
554 if not isinstance(efficiency_error, int):
555 efficiency_error = efficiency_error[valid]
556 if not isinstance(rejection_error, int):
557 rejection_error = rejection_error[valid]
558
559 # Determine "best" cut by maximizing Rejection / Efficiency
560 distance = numpy.sqrt(numpy.square(1 - rejection) + numpy.square(1 - efficiency))
561 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
562 best_idx = None
563 else:
564 best_idx = numpy.nanargmin(distance)
565 best_cut = cuts[best_idx]
566 best_rejection = rejection[best_idx]
567 best_efficiency = efficiency[best_idx]
568
569 self.xmin, self.xmax = numpy.nanmin(numpy.append(efficiency, self.xmin)), numpy.nanmax(numpy.append(efficiency, self.xmax))
570 self.ymin, self.ymax = numpy.nanmin(numpy.append(rejection, self.ymin)), numpy.nanmax(numpy.append(rejection, self.ymax))
571
572 auc = numpy.abs(numpy.trapz(rejection, efficiency))
573
574 self.set_errorbar_options({'fmt': '-o'})
575 p = self._plot_datapoints(self.axis, efficiency, rejection, xerr=efficiency_error, yerr=rejection_error)
576 self.plots.append(p)
577
578 if best_idx is not None:
579 # Plot best cut point
580 self.axis.plot(best_efficiency, best_rejection, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
581 self.axis.axhline(best_rejection, color=p[1].get_color(), linestyle='dashed', linewidth=1)
582 self.axis.axvline(best_efficiency, color=p[1].get_color(), linestyle='dashed', linewidth=1)
583
584 # Add label with best cut info
585 cut_label = f"{label[:10] if label else column[:10]} (AUC: {auc:.2f}, Best cut: {best_cut:.3f})"
586 self.labels.append(cut_label)
587 return self
588
589 def finish(self):
590 """
591 Sets limits, title, axis-labels and legend of the plot
592 """
593 self.setAxisLimits(factor=0.01)
594 self.axis.set_title("ROC Rejection Plot")
595 self.axis.get_yaxis().set_label_text('Background Rejection')
596 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
597
598 self.axis.get_xaxis().set_label_text('Signal Efficiency')
599 return self
600
601
603 """
604 Plots the true ROC curve: True Positive Rate (TPR) vs False Positive Rate (FPR),
605 and marks the cut that gives the point closest to the ideal (0,1).
606 """
607
611
612 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
613 """
614 Add a new curve to the ROC plot
615 @param data pandas.DataFrame containing all data
616 @param column which is used to calculate efficiency and purity for different cuts
617 @param signal_mask boolean numpy.array defining which events are signal events
618 @param bckgrd_mask boolean numpy.array defining which events are background events
619 @param weight_column column in data containing the weights for each event
620 @param label label for the plot legend
621 """
622 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask},
623 weight_column=weight_column)
624
625 tpr, tpr_error = hists.get_efficiency(['Signal']) # True Positive Rate (TPR)
626 fpr, fpr_error = hists.get_efficiency(['Background']) # False Positive Rate (FPR)
627 if isinstance(tpr, int) and not isinstance(fpr, int):
628 tpr = numpy.array([tpr] * len(fpr))
629 elif isinstance(fpr, int) and not isinstance(tpr, int):
630 fpr = numpy.array([fpr] * len(tpr))
631 elif isinstance(fpr, int) and isinstance(tpr, int):
632 tpr = numpy.array([tpr])
633 fpr = numpy.array([fpr])
634 cuts = hists.bin_centers # Cut values for each bin
635
636 valid = numpy.isfinite(tpr) & numpy.isfinite(fpr)
637 tpr = tpr[valid]
638 fpr = fpr[valid]
639 cuts = cuts[valid]
640 if not isinstance(tpr_error, int):
641 tpr_error = tpr_error[valid]
642 if not isinstance(fpr_error, int):
643 fpr_error = fpr_error[valid]
644
645 # Determine "best" cut (closest to top-left corner (0,1))
646 distance = numpy.sqrt(numpy.square(fpr) + numpy.square(1 - tpr))
647 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
648 best_idx = None
649 else:
650 best_idx = numpy.nanargmin(distance)
651 best_cut = cuts[best_idx]
652 best_tpr = tpr[best_idx]
653 best_fpr = fpr[best_idx]
654
655 # Update plot range
656 self.xmin, self.xmax = numpy.nanmin(numpy.append(fpr, self.xmin)), numpy.nanmax(numpy.append(fpr, self.xmax))
657 self.ymin, self.ymax = numpy.nanmin(numpy.append(tpr, self.ymin)), numpy.nanmax(numpy.append(tpr, self.ymax))
658
659 auc = numpy.abs(numpy.trapz(tpr, fpr))
660
661 self.set_errorbar_options({'fmt': '-o'})
662 p = self._plot_datapoints(self.axis, fpr, tpr, xerr=fpr_error, yerr=tpr_error)
663 self.plots.append(p)
664
665 if best_idx is not None:
666 # Plot best cut point
667 self.axis.plot(best_fpr, best_tpr, 'x', color=p[1].get_color(), markersize=8)
668 self.axis.axhline(best_tpr, color=p[1].get_color(), linestyle='dashed', linewidth=1)
669 self.axis.axvline(best_fpr, color=p[1].get_color(), linestyle='dashed', linewidth=1)
670
671 # Add label with best cut info
672 cut_label = f"{label[:10] if label else column[:10]} (AUC: {auc:.2f}, Cut: {best_cut:.3f})"
673 self.labels.append(cut_label)
674 return self
675
676 def finish(self):
677 """
678 Sets limits, title, axis-labels and legend of the plot
679 """
680 self.setAxisLimits(factor=0.01)
681 self.axis.set_title("True ROC Curve")
682 self.axis.get_xaxis().set_label_text('False Positive Rate (Background Efficiency)')
683 self.axis.get_yaxis().set_label_text('True Positive Rate (Signal Efficiency)')
684 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
685 return self
686
687
689 """
690 Plots the Precision vs Recall curve and marks the cut that gives the point closest to the ideal (1,1).
691 """
692
696
697 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
698 """
699 Add a new curve to the Precision-Recall plot
700 @param data pandas.DataFrame containing all data
701 @param column which is used to calculate efficiency and purity for different cuts
702 @param signal_mask boolean numpy.array defining which events are signal events
703 @param bckgrd_mask boolean numpy.array defining which events are background events
704 @param weight_column column in data containing the weights for each event
705 @param label label for the plot legend
706 """
707 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask},
708 weight_column=weight_column)
709
710 recall, recall_error = hists.get_efficiency(['Signal']) # Recall = TPR
711 precision, precision_error = hists.get_purity(['Signal'], ['Background'])
712 if isinstance(recall, int) and not isinstance(precision, int):
713 recall = numpy.array([recall] * len(precision))
714 elif isinstance(precision, int) and not isinstance(recall, int):
715 precision = numpy.array([precision] * len(recall))
716 elif isinstance(precision, int) and isinstance(recall, int):
717 recall = numpy.array([recall])
718 precision = numpy.array([precision])
719 cuts = hists.bin_centers
720
721 valid = numpy.isfinite(precision) & numpy.isfinite(recall)
722 precision = precision[valid]
723 recall = recall[valid]
724 cuts = cuts[valid]
725 if not isinstance(recall_error, int):
726 recall_error = recall_error[valid]
727 if not isinstance(precision_error, int):
728 precision_error = precision_error[valid]
729
730 # Determine "best" cut (closest to point (1,1))
731 distance = numpy.sqrt(numpy.square(1 - precision) + numpy.square(1 - recall))
732 if len(distance) == 0 or numpy.all(numpy.isnan(distance)):
733 best_idx = None
734 else:
735 best_idx = numpy.nanargmin(distance)
736 best_cut = cuts[best_idx]
737 best_recall = recall[best_idx]
738 best_precision = precision[best_idx]
739
740 # Update plot range
741 self.xmin, self.xmax = numpy.nanmin(numpy.append(recall, self.xmin)), numpy.nanmax(numpy.append(recall, self.xmax))
742 self.ymin, self.ymax = numpy.nanmin(numpy.append(precision, self.ymin)), numpy.nanmax(numpy.append(precision, self.ymax))
743
744 auc = numpy.abs(numpy.trapz(precision, recall))
745
746 self.set_errorbar_options({'fmt': '-o'})
747 p = self._plot_datapoints(self.axis, recall, precision, xerr=recall_error, yerr=precision_error)
748 self.plots.append(p)
749
750 if best_idx is not None:
751 # Plot best cut point
752 self.axis.plot(best_recall, best_precision, 'x', color=p[1].get_color(), markersize=8, label='Best cut')
753 self.axis.axhline(best_precision, color=p[1].get_color(), linestyle='dashed', linewidth=1)
754 self.axis.axvline(best_recall, color=p[1].get_color(), linestyle='dashed', linewidth=1)
755
756 # Add label with best cut info
757 cut_label = f"{label[:10] if label else column[:10]} (AUC: {auc:.2f}, Cut: {best_cut:.3f})"
758 self.labels.append(cut_label)
759 return self
760
761 def finish(self):
762 """
763 Sets limits, title, axis-labels and legend of the plot
764 """
765 self.setAxisLimits(factor=0.01)
766 self.axis.set_title("Precision-Recall Curve")
767 self.axis.get_xaxis().set_label_text('Recall (Signal Efficiency)')
768 self.axis.get_yaxis().set_label_text('Precision (Purity)')
769 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
770 return self
771
772
774 """
775 Plots multiple other plots into a grid 3x?
776 """
777
778 figure = None
779
780 axis = None
781
782 def __init__(self, cls, number_of_plots, figure=None, dpi=None):
783 """
784 Creates a new figure if None is given, sets the default plot parameters
785 @param cls class of the plot
786 @param number_of_plots number of plots which should be displayed
787 @param figure default draw figure which is used
788 @param dpi dpi for the matplotlib figure, if None default is used
789 """
790 if number_of_plots == 1:
791 gsTuple = (1, 1)
792 elif number_of_plots == 2:
793 gsTuple = (1, 2)
794 elif number_of_plots == 3:
795 gsTuple = (1, 3)
796 elif number_of_plots == 4:
797 gsTuple = (2, 2)
798 elif number_of_plots == 6:
799 gsTuple = (2, 3)
800 else:
801 gsTuple = (int(numpy.ceil(number_of_plots / 3)), 3)
802
803
804 self.dpi = dpi
805 if figure is None:
806
807 self.figure = matplotlib.figure.Figure(figsize=(12*gsTuple[1], 8*gsTuple[0]), dpi=dpi)
808 else:
809 self.figure = figure
810
811 gs = matplotlib.gridspec.GridSpec(gsTuple[0], gsTuple[1])
812
813 grid_list = list(itertools.product(range(gs.nrows), range(gs.ncols)))
814
815 self.sub_plots = [cls(self.figure, self.figure.add_subplot(gs[grid_list[i][0], grid_list[i][1]]))
816 for i in range(number_of_plots)]
817
818 self.axis = self.sub_plots[0].axis
819 super().__init__(self.figure, self.axis)
820
821 def add(self, i, *args, **kwargs):
822 """
823 Call add function of ith subplot
824 @param i position of the subplot
825 """
826 self.sub_plots[i].add(*args, **kwargs)
827
828 def finish(self):
829 """
830 Sets limits, title, axis-labels and legend of the plot
831 """
832 for plot in self.sub_plots:
833 plot.finish()
834 return self
835
836
838 """
839 Plots the purity in each bin over the classifier output.
840 """
841
845
846 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
847 """
848 Add a new curve to the Diagonal plot
849 @param data pandas.DataFrame containing all data
850 @param column which is used to calculate purity for different cuts
851 @param signal_mask boolean numpy.array defining which events are signal events
852 @param bckgrd_mask boolean numpy.array defining which events are background events
853 @param weight_column column in data containing the weights for each event
854 @param label label for the plot legend
855 """
856 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
857 purity, purity_error = hists.get_purity_per_bin(['Signal'], ['Background'])
858
859 self.xmin, self.xmax = numpy.nanmin(
860 numpy.append(
861 hists.bin_centers, self.xmin)), numpy.nanmax(
862 numpy.append(
863 hists.bin_centers, self.xmax))
864 self.ymin, self.ymax = numpy.nanmin(numpy.append(purity, self.ymin)), numpy.nanmax(numpy.append(purity, self.ymax))
865
866 self.set_errorbar_options({'fmt': '-o'})
867 p = self._plot_datapoints(self.axis, hists.bin_centers, purity, xerr=hists.bin_widths / 2.0, yerr=purity_error)
868 self.plots.append(p)
869 if label is None:
870 self.labels.append(column)
871 else:
872 self.labels.append(label)
873 return self
874
875 def finish(self):
876 """
877 Sets limits, title, axis-labels and legend of the plot
878 """
879 self.scale_limits()
880 self.axis.plot((0.0, 1.0), (0.0, 1.0), color='black')
881 self.setAxisLimits(factor=0.01)
882 self.axis.set_title("Diagonal Plot")
883 self.axis.get_xaxis().set_label_text('Classifier Output')
884 self.axis.get_yaxis().set_label_text('Purity Per Bin')
885 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
886 return self
887
888
890 """
891 Plots distribution of a quantity
892 """
893
894 def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False,
895 keep_first_binning=False, range_in_std=None):
896 """
897 Creates a new figure and axis if None is given, sets the default plot parameters
898 @param figure default draw figure which is used
899 @param axis default draw axis which is used
900 @param normed true if histograms should be normed before drawing
901 @param keep_first_binning use the binning of the first distribution for further plots
902 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
903 """
904 super().__init__(figure, axis)
905
906 self.normed_to_all_entries = normed_to_all_entries
907
908 self.normed_to_bin_width = normed_to_bin_width
909
910 self.range_in_std = range_in_std
911 # if self.normed_to_all_entries or self.normed_to_bin_width:
912
913 self.ymin = float(0)
914
915 self.ymax = float('-inf')
916
917 self.xmin = float('inf')
918
919 self.xmax = float('-inf')
920
921 self.keep_first_binning = keep_first_binning
922
923 self.first_binning = None
924
925 self.x_axis_label = ''
926
927 def add(self, data, column, mask=None, weight_column=None, label=None):
928 """
929 Add a new distribution to the plots
930 @param data pandas.DataFrame containing all data
931 @param column which is used to calculate distribution histogram
932 @param mask boolean numpy.array defining which events are used for the histogram
933 @param weight_column column in data containing the weights for each event
934 @param label label for the plot legend
935 """
936 if mask is None:
937 mask = numpy.ones(len(data)).astype('bool')
938
939 bins = 100
940 if self.keep_first_binning and self.first_binning is not None:
941 bins = self.first_binning
942 hists = histogram.Histograms(data, column, {'Total': mask}, weight_column=weight_column,
943 bins=bins, equal_frequency=False, range_in_std=self.range_in_std)
944 if self.keep_first_binning and self.first_binning is None:
945 self.first_binning = hists.bins
946 hist, hist_error = hists.get_hist('Total')
947
948 if self.normed_to_all_entries:
949 normalization = float(numpy.sum(hist))
950 hist = hist / normalization if normalization > 0 else hist
951 hist_error = hist_error / normalization if normalization > 0 else hist_error
952
953 if self.normed_to_bin_width:
954 hist = hist / hists.bin_widths if normalization > 0 else hist
955 hist_error = hist_error / hists.bin_widths if normalization > 0 else hist_error
956
957 self.xmin, self.xmax = numpy.nanmin(
958 numpy.append(
959 hists.bin_centers, self.xmin)), numpy.nanmax(
960 numpy.append(
961 hists.bin_centers, self.xmax))
962 self.ymin, self.ymax = numpy.nanmin(numpy.append(hist, self.ymin)), numpy.nanmax(numpy.append(hist + hist_error, self.ymax))
963
964 self.set_errorbar_options({'fmt': '-o'})
965 p = self._plot_datapoints(self.axis, hists.bin_centers, hist, xerr=hists.bin_widths / 2, yerr=hist_error)
966 self.plots.append(p)
967 self.x_axis_label = column
968
969 appendix = ''
970 if self.ymax <= self.ymin or self.xmax <= self.xmin:
971 appendix = ' No data to plot!'
972
973 if label is None:
974 self.labels.append(column + appendix)
975 else:
976 self.labels.append(label + appendix)
977 return self
978
979 def finish(self):
980 """
981 Sets limits, title, axis-labels and legend of the plot
982 """
983 self.axis.set_title("Distribution Plot")
984 self.axis.get_xaxis().set_label_text(self.x_axis_label)
985
986 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
987
988 if self.ymax <= self.ymin or self.xmax <= self.xmin:
989 self.axis.set_xlim((0., 1.))
990 self.axis.set_ylim((0., 1.))
991 self.axis.text(0.36, 0.5, 'No data to plot', fontsize=60, color='black')
992 return self
993
994 self.scale_limits()
995 self.setAxisLimits(factor=0.01)
996
998 self.axis.get_yaxis().set_label_text('# Entries per Bin / (# Entries * Bin Width)')
999 elif self.normed_to_all_entries:
1000 self.axis.get_yaxis().set_label_text('# Entries per Bin / # Entries')
1001 elif self.normed_to_bin_width:
1002 self.axis.get_yaxis().set_label_text('# Entries per Bin / Bin Width')
1003 else:
1004 self.axis.get_yaxis().set_label_text('# Entries per Bin')
1005
1006 return self
1007
1008
1010 """
1011 Create a boxplot
1012 """
1013
1015
1016 def __init__(self, figure=None, axis=None, x_axis_label=None):
1017 """
1018 Creates a new figure and axis if None is given, sets the default plot parameters
1019 @param figure default draw figure which is used
1020 @param axis default draw axis which is used
1021 """
1022 super().__init__(figure=figure, axis=axis)
1023
1024
1025 self.x_axis_label = x_axis_label
1026
1027 def add(self, data, column, mask=None, weight_column=None):
1028 """
1029 Add a new boxplot to the plots
1030 @param data pandas.DataFrame containing all data
1031 @param column which is used to calculate boxplot quantities
1032 @param mask boolean numpy.array defining which events are used for the histogram
1033 @param weight_column column in data containing the weights for each event
1034 """
1035 if mask is None:
1036 mask = numpy.ones(len(data)).astype('bool')
1037 x = data[column][mask]
1038 if weight_column is not None:
1039 # weight = data[weight_column][mask]
1040 b2.B2WARNING("Weights are currently not used in boxplot, due to limitations in matplotlib")
1041
1042 if len(x) == 0:
1043 b2.B2WARNING("Ignore empty boxplot.")
1044 return self
1045
1046 # we don't plot outliers as they cause the file size to explode if large datasets are used
1047 p = self.axis.boxplot(x, sym='k.', whis=1.5, vert=False, patch_artist=True, showmeans=True, widths=1,
1048 boxprops=dict(facecolor='blue', alpha=0.5), showfliers=False,
1049 # medianprobs=dict(color='blue'),
1050 # meanprobs=dict(color='red'),
1051 )
1052 self.plots.append(p)
1053 self.labels.append(column)
1054 if not self.x_axis_label:
1055 self.x_axis_label = column
1056 r"""
1057 self.axis.text(0.1, 0.9, (r'$ \mu = {:.2f}$' + '\n' + r'$median = {:.2f}$').format(x.mean(), x.median()),
1058 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)
1059 self.axis.text(0.4, 0.9, (r'$ \sigma = {:.2f}$' + '\n' + r'$IQD = {:.2f}$').format(x.std(),
1060 x.quantile(0.75) - x.quantile(0.25)),
1061 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)
1062 self.axis.text(0.7, 0.9, (r'$min = {:.2f}$' + '\n' + r'$max = {:.2f}$').format(x.min(), x.max()),
1063 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)
1064 """
1065
1066 return self
1067
1068 def finish(self):
1069 """
1070 Sets limits, title, axis-labels and legend of the plot
1071 """
1072 matplotlib.artist.setp(self.axis.get_yaxis(), visible=False)
1073 self.axis.get_xaxis().set_label_text(self.x_axis_label)
1074 self.axis.set_title("Box Plot")
1075 return self
1076
1077
1079 """
1080 Plots the difference between two histograms
1081 """
1082
1094
1095 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
1096 """
1097 Creates a new figure and axis if None is given, sets the default plot parameters
1098 @param figure default draw figure which is used
1099 @param axis default draw axis which is used
1100 @param normed normalize minuend and subtrahend before comparing them
1101 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
1102 """
1103 super().__init__(figure, axis)
1104 self.normed = normed
1105 self.shift_to_zero = shift_to_zero
1106 if self.normed:
1107 self.ymin = -0.01
1108 self.ymax = 0.01
1109 else:
1110 self.ymin = -1
1111 self.ymax = 1
1112
1113 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None):
1114 """
1115 Add a new difference plot
1116 @param data pandas.DataFrame containing all data
1117 @param column which is used to calculate distribution histogram
1118 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
1119 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
1120 @param weight_column column in data containing the weights for each event
1121 @param label label for the legend if None, the column name is used
1122 """
1123 hists = histogram.Histograms(data, column, {'Minuend': minuend_mask, 'Subtrahend': subtrahend_mask},
1124 weight_column=weight_column, equal_frequency=False)
1125 minuend, minuend_error = hists.get_hist('Minuend')
1126 subtrahend, subtrahend_error = hists.get_hist('Subtrahend')
1127
1128 difference_error = histogram.poisson_error(minuend + subtrahend)
1129 if self.normed:
1130 difference_error = difference_error / (numpy.sum(minuend) + numpy.sum(subtrahend))
1131 minuend = minuend / numpy.sum(minuend)
1132 subtrahend = subtrahend / numpy.sum(subtrahend)
1133 difference = minuend - subtrahend
1134
1135 if self.shift_to_zero:
1136 difference = difference - numpy.mean(difference)
1137
1138 self.xmin, self.xmax = numpy.nanmin(
1139 numpy.append(
1140 hists.bin_centers, self.xmin)), numpy.nanmax(
1141 numpy.append(
1142 hists.bin_centers, self.xmax))
1143 self.ymin, self.ymax = numpy.nanmin(numpy.append(difference - difference_error, self.ymin)
1144 ), numpy.nanmax(numpy.append(difference + difference_error, self.ymax))
1145
1146 self.set_errorbar_options({'fmt': '-o'})
1147 p = self._plot_datapoints(self.axis, hists.bin_centers, difference, xerr=hists.bin_widths / 2, yerr=difference_error)
1148 self.plots.append(p)
1149 if label is None:
1150 self.labels.append(label)
1151 else:
1152 self.labels.append(column)
1153 self.x_axis_label = column
1154 return self
1155
1156 def finish(self, line_color='black'):
1157 """
1158 Sets limits, title, axis-labels and legend of the plot
1159 """
1160 self.axis.plot((self.xmin, self.xmax), (0, 0), color=line_color, linewidth=4, rasterized=True)
1161 self.scale_limits()
1162 self.setAxisLimits(factor=0.01)
1163 self.axis.set_title("Difference Plot")
1164 self.axis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
1165 self.axis.get_xaxis().set_label_text(self.x_axis_label)
1166 self.axis.get_yaxis().set_label_text('Diff.')
1167 self.axis.legend([x[0] for x in self.plots], self.labels, loc='best', fancybox=True, framealpha=0.5)
1168 return self
1169
1170
1172 """
1173 Create TMVA-like overtraining control plot for a classification training
1174 """
1175
1176
1177 figure = None
1178
1179 axis = None
1180
1181 axis_d1 = None
1182
1183 axis_d2 = None
1184
1185 def __init__(self, figure=None, dpi=None):
1186 """
1187 Creates a new figure if None is given, sets the default plot parameters
1188 @param figure default draw figure which is used
1189 @param dpi dpi for the matplotlib figure, if None default is used
1190 """
1191
1192 self.dpi = dpi
1193 if figure is None:
1194
1195 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=self.dpi)
1196 else:
1197 self.figure = figure
1198
1199 gs = matplotlib.gridspec.GridSpec(5, 1)
1200
1201 self.axis = self.figure.add_subplot(gs[:3, :])
1202
1203 self.axis_d1 = self.figure.add_subplot(gs[3, :], sharex=self.axis)
1204
1205 self.axis_d2 = self.figure.add_subplot(gs[4, :], sharex=self.axis)
1206
1207 super().__init__(self.figure, self.axis)
1208
1209 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None):
1210 """
1211 Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
1212 otherwise there are too many curves in the plot to recognize anything in the plot.
1213 @param data pandas.DataFrame containing all data
1214 @param column which is used to calculate distribution histogram
1215 @param train_mask boolean numpy.array defining which events are training events
1216 @param test_mask boolean numpy.array defining which events are test events
1217 @param signal_mask boolean numpy.array defining which events are signal events
1218 @param bckgrd_mask boolean numpy.array defining which events are background events
1219 @param weight_column column in data containing the weights for each event
1220 """
1221 distribution = Distribution(self.figure, self.axis, normed_to_all_entries=True)
1222 self.axis.set_yscale('log')
1223
1224 distribution.set_plot_options(self.plot_kwargs)
1225 distribution.set_errorbar_options(self.errorbar_kwargs)
1226 distribution.set_errorband_options(self.errorband_kwargs)
1227 distribution.add(data, column, test_mask & signal_mask, weight_column)
1228 distribution.add(data, column, test_mask & bckgrd_mask, weight_column)
1229
1230 distribution.set_plot_options(
1231 {'color': distribution.plots[0][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
1232 distribution.set_fill_options({'color': distribution.plots[0][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
1233 distribution.set_errorbar_options(None)
1234 distribution.set_errorband_options(None)
1235 distribution.add(data, column, train_mask & signal_mask, weight_column)
1236 distribution.set_plot_options(
1237 {'color': distribution.plots[1][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
1238 distribution.set_fill_options({'color': distribution.plots[1][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
1239 distribution.add(data, column, train_mask & bckgrd_mask, weight_column)
1240
1241 distribution.labels = ['Test-Signal', 'Test-Background', 'Train-Signal', 'Train-Background']
1242 distribution.finish()
1243
1244 self.plot_kwargs['color'] = distribution.plots[0][0][0].get_color()
1245 difference_signal = Difference(self.figure, self.axis_d1, shift_to_zero=True, normed=True)
1246 difference_signal.set_plot_options(self.plot_kwargs)
1247 difference_signal.set_errorbar_options(self.errorbar_kwargs)
1248 difference_signal.set_errorband_options(self.errorband_kwargs)
1249 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column)
1250 self.axis_d1.set_xlim((difference_signal.xmin, difference_signal.xmax))
1251 self.axis_d1.set_ylim((difference_signal.ymin, difference_signal.ymax))
1252 difference_signal.plots = difference_signal.labels = []
1253 difference_signal.finish(line_color=distribution.plots[0][0][0].get_color())
1254
1255 self.plot_kwargs['color'] = distribution.plots[1][0][0].get_color()
1256 difference_bckgrd = Difference(self.figure, self.axis_d2, shift_to_zero=True, normed=True)
1257 difference_bckgrd.set_plot_options(self.plot_kwargs)
1258 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
1259 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
1260 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column)
1261 self.axis_d2.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
1262 self.axis_d2.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
1263 difference_bckgrd.plots = difference_bckgrd.labels = []
1264 difference_bckgrd.finish(line_color=distribution.plots[1][0][0].get_color())
1265
1266 try:
1267 import scipy.stats
1268 # Kolmogorov smirnov test
1269 if len(data[column][train_mask & signal_mask]) == 0 or len(data[column][test_mask & signal_mask]) == 0:
1270 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for signal due to missing data")
1271 else:
1272 ks = scipy.stats.ks_2samp(data[column][train_mask & signal_mask], data[column][test_mask & signal_mask])
1273 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
1274 self.axis_d1.text(0.1, 0.9, r'signal (train - test) difference $p={:.2f}$'.format(ks[1]), bbox=props,
1275 verticalalignment='top', horizontalalignment='left', transform=self.axis_d1.transAxes)
1276 if len(data[column][train_mask & bckgrd_mask]) == 0 or len(data[column][test_mask & bckgrd_mask]) == 0:
1277 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for background due to missing data")
1278 else:
1279 ks = scipy.stats.ks_2samp(data[column][train_mask & bckgrd_mask], data[column][test_mask & bckgrd_mask])
1280 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
1281 self.axis_d2.text(0.1, 0.9, r'background (train - test) difference $p={:.2f}$'.format(ks[1]),
1282 bbox=props,
1283 verticalalignment='top', horizontalalignment='left', transform=self.axis_d2.transAxes)
1284 except ImportError:
1285 b2.B2WARNING("Cannot calculate kolmogorov smirnov test please install scipy!")
1286
1287 return self
1288
1289 def finish(self):
1290 """
1291 Sets limits, title, axis-labels and legend of the plot
1292 """
1293 self.axis.set_title("Overtraining Plot")
1294 self.axis_d1.set_title("")
1295 self.axis_d2.set_title("")
1296 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=False)
1297 matplotlib.artist.setp(self.axis_d1.get_xticklabels(), visible=False)
1298 self.axis.get_xaxis().set_label_text('')
1299 self.axis_d1.get_xaxis().set_label_text('')
1300 self.axis_d2.get_xaxis().set_label_text('Classifier Output')
1301 return self
1302
1303
1305 """
1306 Plots distribution of a quantity including boxplots
1307 """
1308
1309
1310 box_axes = None
1311
1312 def __init__(self, figure=None, axis=None, normed=False, range_in_std=None, x_axis_label=None):
1313 """
1314 Creates a new figure and axis if None is given, sets the default plot parameters
1315 @param figure default draw figure which is used
1316 @param axis default draw axis which is used
1317 @param normed true if the histograms should be normed before drawing
1318 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
1319 """
1320 super().__init__(figure, axis)
1321
1322 self.normed = normed
1323
1324 self.range_in_std = range_in_std
1325
1326 self.box_axes = []
1327
1328 self.distribution = Distribution(self.figure, self.axis, normed_to_all_entries=self.normed, range_in_std=self.range_in_std)
1329
1330 self.x_axis_label = x_axis_label
1331
1332 def add(self, data, column, mask=None, weight_column=None, label=None):
1333 """
1334 Add a new distribution plot, with additional information like a boxplot compared to
1335 the ordinary Distribution plot.
1336 @param data pandas.DataFrame containing all data
1337 @param column which is used to calculate distribution histogram
1338 @param mask boolean numpy.array defining which events are used for the distribution histogram
1339 @param weight_column column in data containing the weights for each event
1340 @param label label for the plot legend
1341 """
1345 self.distribution.add(data, column, mask, weight_column, label=label)
1346
1347 n = len(self.box_axes) + 1
1348 gs = matplotlib.gridspec.GridSpec(4 * n, 1)
1349 gridspecs = [gs[:3 * n, :]] + [gs[3 * n + i, :] for i in range(n)]
1350 box_axis = self.add_subplot(gridspecs)
1351
1352 if self.range_in_std is not None:
1353 mean, std = histogram.weighted_mean_and_std(data[column], None if weight_column is None else data[weight_column])
1354 # Everything outside mean +- range_in_std * std is considered not inside the mask
1355 mask = mask & (data[column] > (mean - self.range_in_std * std)) & (data[column] < (mean + self.range_in_std * std))
1356 box = Box(self.figure, box_axis, x_axis_label=self.x_axis_label)
1357 box.add(data, column, mask, weight_column)
1358 if len(box.plots) > 0:
1359 box.plots[0]['boxes'][0].set_facecolor(self.distribution.plots[-1][0][0].get_color())
1360 box.finish()
1361
1362 self.box_axes.append(box_axis)
1363 return self
1364
1365 def finish(self):
1366 """
1367 Sets limits, title, axis-labels and legend of the plot
1368 """
1369 self.distribution.finish()
1370 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=False)
1371 self.axis.get_xaxis().set_label_text('')
1372 for box_axis in self.box_axes[:-1]:
1373 matplotlib.artist.setp(box_axis.get_xticklabels(), visible=False)
1374 box_axis.set_title("")
1375 box_axis.get_xaxis().set_label_text('')
1376 self.box_axes[-1].set_title("")
1377 self.axis.set_title("Distribution Plot")
1378 self.axis.legend([x[0] for x in self.distribution.plots], self.distribution.labels,
1379 loc='best', fancybox=True, framealpha=0.5)
1380 return self
1381
1382
1384 """
1385 Plots change of a distribution of a quantity depending on the cut on a classifier
1386 """
1387
1388 figure = None
1389
1390 axis = None
1391
1392 axis_d1 = None
1393
1394 axis_d2 = None
1395
1396 def __init__(self, figure=None, dpi=None):
1397 """
1398 Creates a new figure if None is given, sets the default plot parameters
1399 @param figure default draw figure which is used
1400 @param dpi dpi for the matplotlib figure, if None default is used
1401 """
1402
1403 self.dpi = dpi
1404 if figure is None:
1405
1406 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=self.dpi)
1407 else:
1408 self.figure = figure
1409
1410 gs = matplotlib.gridspec.GridSpec(3, 2)
1411
1412 self.axis = self.figure.add_subplot(gs[0, :])
1413
1414 self.axis_d1 = self.figure.add_subplot(gs[1, :], sharex=self.axis)
1415
1416 self.axis_d2 = self.figure.add_subplot(gs[2, :], sharex=self.axis)
1417
1418 super().__init__(self.figure, self.axis)
1419
1420 def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None):
1421 """
1422 Add a new correlation plot.
1423 @param data pandas.DataFrame containing all data
1424 @param column which is used to calculate distribution histogram
1425 @param cut_column which is used to calculate cut on the other quantity defined by column
1426 @param quantiles list of quantiles between 0 and 100, defining the different cuts
1427 @param weight_column column in data containing the weights for each event
1428 """
1429 if len(data[cut_column]) == 0:
1430 b2.B2WARNING("Ignore empty Correlation.")
1431 return self
1432
1433 axes = [self.axis, self.axis_d1, self.axis_d2]
1434
1435 for i, (l, m) in enumerate([('.', signal_mask | bckgrd_mask), ('S', signal_mask), ('B', bckgrd_mask)]):
1436 if weight_column is not None:
1437 weights = numpy.array(data[weight_column][m])
1438 else:
1439 weights = numpy.ones(len(data[column][m]))
1440
1441 xrange = numpy.percentile(data[column][m], [5, 95])
1442 isfinite = numpy.isfinite(data[column][m])
1443 if not numpy.all(isfinite):
1444 xrange = numpy.percentile(data[column][m][isfinite], [5, 95])
1445 elif numpy.all(numpy.isnan(data[column][m])):
1446 b2.B2WARNING("All data is NaN, cannot calculate range and ignore Correlation.")
1447 return self
1448
1449 colormap = plt.get_cmap('coolwarm')
1450 tmp, x = numpy.histogram(data[column][m], bins=100,
1451 range=xrange, density=True, weights=weights)
1452 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1453 axes[i].plot(bin_center, tmp, color='black', lw=1)
1454
1455 for quantil in numpy.arange(5, 100, 5):
1456 cut = numpy.percentile(data[cut_column][m], quantil)
1457 sel = data[cut_column][m] >= cut
1458 y, x = numpy.histogram(data[column][m][sel], bins=100,
1459 range=xrange, density=True, weights=weights[sel])
1460 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1461 axes[i].fill_between(bin_center, tmp, y, color=colormap(quantil / 100.0))
1462 tmp = y
1463
1464 axes[i].set_ylim(bottom=0)
1465
1466 flatness_score = basf2_mva_util.calculate_flatness(data[column][m], data[cut_column][m], weights)
1467 axes[i].set_title(r'Distribution for different quantiles: $\mathrm{{Flatness}}_{} = {:.3f}$'.format(l, flatness_score))
1468 return self
1469
1470 def finish(self):
1471 """
1472 Sets limits, title, axis-labels and legend of the plot
1473 """
1474 return self
1475
1476
1478 """
1479 Plots multivariate distribution using TSNE algorithm
1480 """
1481
1482 def add(self, data, columns, *masks):
1483 """
1484 Add a new correlation plot.
1485 @param data pandas.DataFrame containing all data
1486 @param columns which are used to calculate the correlations
1487 @param masks different classes to show in TSNE
1488 """
1489 try:
1490 import sklearn
1491 import sklearn.manifold
1492 model = sklearn.manifold.TSNE(n_components=2, random_state=0)
1493 data = numpy.array([data[column] for column in columns]).T
1494 model.fit(data)
1495 for mask in masks:
1496 data = numpy.array([data[column][mask] for column in columns]).T
1497 data = model.transform(data)
1498 self.axis.scatter(data[:, 0], data[:, 1], rasterized=True)
1499 except ImportError:
1500 print("Cannot create TSNE plot. Install sklearn if you want it")
1501 return self
1502
1503 def finish(self):
1504 """
1505 Sets limits, title, axis-labels and legend of the plot
1506 """
1507 return self
1508
1509
1511 """
1512 Plots importance matrix
1513 """
1514
1515 def add(self, data, columns, variables):
1516 """
1517 Add a new correlation plot.
1518 @param data pandas.DataFrame containing all data
1519 @param columns which are used to calculate the correlations
1520 """
1521
1522 def norm(x):
1523 width = (numpy.max(x) - numpy.min(x))
1524 if width <= 0:
1525 return numpy.zeros(x.shape)
1526 return (x - numpy.min(x)) / width * 100
1527
1528 importance_matrix = numpy.vstack([norm(data[column]) for column in columns]).T
1529 im = self.axis.imshow(
1530 importance_matrix[::-1], # <- reverse rows
1531 cmap=plt.cm.RdBu,
1532 vmin=0.0,
1533 vmax=100.0,
1534 aspect='equal',
1535 interpolation='nearest',
1536 origin='upper'
1537 )
1538
1539 num_y, num_x = importance_matrix.shape
1540
1541 # Adjust font size based on matrix size
1542 base_font_size = 14
1543 font_size = max(6, base_font_size * min(1.0, 25 / max(num_x, num_y)))
1544
1545 # Tick positions and labels
1546 self.axis.set_xticks(numpy.arange(num_x))
1547 self.axis.set_yticks(numpy.arange(num_y))
1548
1549 self.axis.set_xticklabels(columns, rotation=90, fontsize=font_size)
1550 self.axis.set_yticklabels(reversed(variables), fontsize=font_size)
1551
1552 self.axis.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)
1553
1554 # Add text annotations
1555 for y in range(num_y):
1556 for x in range(num_x):
1557 value = importance_matrix[-1-y, x] # Reverse y-axis for correct annotation
1558 txt = self.axis.text(
1559 x, y, f'{value:.0f}',
1560 ha='center', va='center',
1561 fontsize=font_size,
1562 color='white'
1563 )
1564 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='black')])
1565
1566 # Colorbar
1567 cb = self.figure.colorbar(im, ax=self.axis, ticks=[0.0, 100.0], orientation='vertical')
1568 cb.ax.set_yticklabels(['low', 'high'])
1569 cb.solids.set_rasterized(True)
1570
1571 # Layout tightening
1572 self.axis.set_xlim(-0.5, num_x - 0.5)
1573 self.axis.set_ylim(num_y - 0.5, -0.5) # origin='upper' flips y
1574
1575 return self
1576
1577 def finish(self):
1578 """
1579 Sets limits, title, axis-labels and legend of the plot
1580 """
1581 return self
1582
1583
1585 """
1586 Plots correlation matrix
1587 """
1588
1589 figure = None
1590
1591 signal_axis = None
1592
1593 bckgrd_axis = None
1594
1595 def __init__(self, figure=None, dpi=None):
1596 """
1597 Creates a new figure if None is given, sets the default plot parameters
1598 @param figure default draw figure which is used
1599 @param dpi dpi for the matplotlib figure, if None default is used
1600 """
1601
1602 self.dpi = dpi
1603 if figure is None:
1604
1605 self.figure = matplotlib.figure.Figure(figsize=(12, 8), dpi=self.dpi)
1606 else:
1607 self.figure = figure
1608
1609 gs = matplotlib.gridspec.GridSpec(8, 2)
1610
1611 self.signal_axis = self.figure.add_subplot(gs[:6, 0])
1612
1613 self.bckgrd_axis = self.figure.add_subplot(gs[:6, 1], sharey=self.signal_axis)
1614
1615 self.colorbar_axis = self.figure.add_subplot(gs[7, :])
1616
1617 self.axis = self.signal_axis
1618
1619 super().__init__(self.figure, self.axis)
1620
1621 def add(self, data, columns, signal_mask, bckgrd_mask):
1622 """
1623 Add a new correlation plot.
1624 @param data pandas.DataFrame containing all data
1625 @param columns which are used to calculate the correlations
1626 """
1627 num_vars = len(columns)
1628 font_size = max(4, min(14, 200 // num_vars)) # Scale font size
1629
1630 signal_corr = numpy.corrcoef(numpy.vstack([data[column][signal_mask] for column in columns])) * 100
1631 bckgrd_corr = numpy.corrcoef(numpy.vstack([data[column][bckgrd_mask] for column in columns])) * 100
1632
1633 signal_heatmap = self.signal_axis.imshow(
1634 signal_corr[::-1, ::-1], # <- reverse rows and columns
1635 cmap=plt.cm.RdBu,
1636 vmin=-100.0,
1637 vmax=100.0,
1638 origin='upper',
1639 aspect='auto',
1640 interpolation='nearest')
1641 self.bckgrd_axis.imshow(
1642 bckgrd_corr[::-1, ::-1], # <- reverse rows and columns
1643 cmap=plt.cm.RdBu,
1644 vmin=-100.0,
1645 vmax=100.0,
1646 origin='upper',
1647 aspect='auto',
1648 interpolation='nearest')
1649
1650 # Tick positions
1651 tick_positions = numpy.arange(num_vars)
1652
1653 # Signal ticks
1654 self.signal_axis.set_xlabel('Signal')
1655 self.signal_axis.set_xticks(tick_positions)
1656 self.signal_axis.set_yticks(tick_positions)
1657 self.signal_axis.set_xticklabels(reversed(columns), rotation=90, fontsize=font_size)
1658 self.signal_axis.set_yticklabels(reversed(columns), fontsize=font_size)
1659 self.signal_axis.xaxis.tick_top()
1660 self.signal_axis.invert_yaxis()
1661
1662 # Background ticks
1663 self.bckgrd_axis.set_xlabel('Background')
1664 self.bckgrd_axis.set_xticks(tick_positions)
1665 self.bckgrd_axis.set_yticks(tick_positions)
1666 self.bckgrd_axis.set_xticklabels(reversed(columns), rotation=90, fontsize=font_size)
1667 self.bckgrd_axis.set_yticklabels(reversed(columns), fontsize=font_size)
1668 self.bckgrd_axis.xaxis.tick_top()
1669 self.bckgrd_axis.invert_yaxis()
1670
1671 # Add annotation text
1672 for y in range(num_vars):
1673 for x in range(num_vars):
1674 txt = self.signal_axis.text(x, y, f'{signal_corr[-1-y, -1-x]:.0f}',
1675 ha='center', va='center',
1676 fontsize=font_size,
1677 color='white')
1678 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='k')])
1679 txt = self.bckgrd_axis.text(x, y, f'{bckgrd_corr[-1-y, -1-x]:.0f}',
1680 ha='center', va='center',
1681 fontsize=font_size,
1682 color='white')
1683 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='k')])
1684
1685 # Colorbar
1686 cb = self.figure.colorbar(signal_heatmap, cax=self.colorbar_axis,
1687 ticks=[-100, 0, 100], orientation='horizontal')
1688 cb.solids.set_rasterized(True)
1689 cb.ax.set_xticklabels(['negative', 'uncorrelated', 'positive'])
1690
1691 return self
1692
1693 def finish(self):
1694 """
1695 Sets limits, title, axis-labels and legend of the plot
1696 """
1697 matplotlib.artist.setp(self.bckgrd_axis.get_yticklabels(), visible=False)
1698 return self
1699
1700
1701if __name__ == '__main__':
1702
1703 def get_data(N, columns):
1704 """
1705 Creates fake data for example plots
1706 """
1707 N /= 2
1708 n = len(columns) - 1
1709 xs = numpy.random.normal(0, size=(N, n))
1710 xb = numpy.random.normal(1, size=(N, n))
1711 ys = numpy.zeros(N)
1712 yb = numpy.ones(N)
1713 data = pandas.DataFrame(numpy.c_[numpy.r_[xs, xb], numpy.r_[ys, yb]], columns=columns)
1714 return data.reindex(numpy.random.permutation(data.index))
1715
1716 import seaborn
1717 # Set nice searborn settings
1718 seaborn.set(font_scale=3)
1719 seaborn.set_style('whitegrid')
1720
1721 # Standard plots
1722 N = 100000
1723 data = get_data(N, columns=['FastBDT', 'NeuroBayes', 'isSignal'])
1724 data['type'] = ''
1725 data.type.iloc[:N / 2] = 'Train'
1726 data.type.iloc[N / 2:] = 'Test'
1727
1728 p = Box()
1729 p.add(data, 'FastBDT')
1730 p.finish()
1731 p.save('box_plot.png')
1732
1734 p.add(data, 'FastBDT')
1735 p.add(data, 'NeuroBayes')
1736 p.finish()
1737 p.save('verbose_distribution_plot.png')
1738
1740 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1741 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1742 p.finish()
1743 p.save('roc_purity_plot.png')
1744
1746 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1747 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1748 p.finish()
1749 p.save('roc_rejection_plot.png')
1750
1751 p = Diagonal()
1752 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1753 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1754 p.finish()
1755 p.save('diagonal_plot.png')
1756
1757 p = Distribution()
1758 p.add(data, 'FastBDT')
1759 p.add(data, 'NeuroBayes')
1760 p.finish()
1761 p.save('distribution_plot.png')
1762
1763 p = Difference()
1764 p.add(data, 'FastBDT', data['type'] == 'Train', data['type'] == 'Test')
1765 p.add(data, 'NeuroBayes', data['type'] == 'Train', data['type'] == 'Test')
1766 p.finish()
1767 p.save('difference_plot.png')
1768
1769 p = Overtraining()
1770 p.add(data, 'FastBDT', data['type'] == 'Train', data['type'] == 'Test', data['isSignal'] == 1, data['isSignal'] == 0)
1771 p.finish()
1772 p.save('overtraining_plot.png')
1773
1774 p = Correlation()
1775 p.add(data, 'FastBDT', 'NeuroBayes', [0, 20, 40, 60, 80, 100], data['isSignal'] == 0)
1776 p.finish()
1777 p.save('correlation_plot.png')
1778
1779 p = CorrelationMatrix()
1780 data['FastBDT2'] = data['FastBDT']**2
1781 data['NeuroBayes2'] = data['NeuroBayes']**2
1782 data['FastBDT3'] = data['FastBDT']**3
1783 data['NeuroBayes3'] = data['NeuroBayes']**3
1784 p.add(data, ['FastBDT', 'NeuroBayes', 'FastBDT2', 'NeuroBayes2', 'FastBDT3', 'NeuroBayes3'])
1785 p.finish()
1786 p.save('correlation_matrix.png')
calculate_flatness(f, p, w=None)
__init__(self, figure=None, axis=None, x_axis_label=None)
Definition plotting.py:1016
x_axis_label
Label on x axis.
Definition plotting.py:1025
add(self, data, column, mask=None, weight_column=None)
Definition plotting.py:1027
signal_axis
Main axis which shows the correlation of the signal samples.
Definition plotting.py:1591
colorbar_axis
add signal subplot
Definition plotting.py:1615
add(self, data, columns, signal_mask, bckgrd_mask)
Definition plotting.py:1621
__init__(self, figure=None, dpi=None)
Definition plotting.py:1595
bckgrd_axis
Axis which shows the correlation of the background samples.
Definition plotting.py:1593
axis_d1
Axis which shows shape of signal.
Definition plotting.py:1392
__init__(self, figure=None, dpi=None)
Definition plotting.py:1396
axis_d2
Axis which shows shape of background.
Definition plotting.py:1394
add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None)
Definition plotting.py:1420
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:846
x_axis_label
Label on x axis.
Definition plotting.py:1153
shift_to_zero
Mean difference is shifted to zero (removes constant offset) if this is true.
Definition plotting.py:1105
__init__(self, figure=None, axis=None, normed=False, shift_to_zero=False)
Definition plotting.py:1095
add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None)
Definition plotting.py:1113
normed
Minuend and subtrahend are normed before comparing them if this is true.
Definition plotting.py:1104
finish(self, line_color='black')
Definition plotting.py:1156
str x_axis_label
x axis label
Definition plotting.py:925
keep_first_binning
Keep first binning if user wants so.
Definition plotting.py:921
normed_to_all_entries
Normalize histograms before drawing them.
Definition plotting.py:906
first_binning
first binning
Definition plotting.py:923
__init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False, keep_first_binning=False, range_in_std=None)
Definition plotting.py:895
range_in_std
Show only a certain range in terms of standard deviations of the data.
Definition plotting.py:910
add(self, data, column, mask=None, weight_column=None, label=None)
Definition plotting.py:927
normed_to_bin_width
Normalize histograms before drawing them.
Definition plotting.py:908
add(self, data, columns, variables)
Definition plotting.py:1515
list sub_plots
the subplots which are displayed in the grid
Definition plotting.py:815
__init__(self, cls, number_of_plots, figure=None, dpi=None)
Definition plotting.py:782
add(self, i, *args, **kwargs)
Definition plotting.py:821
axis_d1
Axis which shows the difference between training and test signal.
Definition plotting.py:1181
__init__(self, figure=None, dpi=None)
Definition plotting.py:1185
axis_d2
Axis which shows the difference between training and test background.
Definition plotting.py:1183
add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None)
Definition plotting.py:1209
list plots
Plots added to the axis so far.
Definition plotting.py:59
list labels
Labels of the plots added so far.
Definition plotting.py:61
fill_kwargs
Default keyword arguments for fill_between function.
Definition plotting.py:119
float yscale
create figure
Definition plotting.py:70
save(self, filename)
Definition plotting.py:141
xmin
Minimum x value.
Definition plotting.py:63
add_subplot(self, gridspecs)
Definition plotting.py:129
figure
figure which is used to draw
Definition plotting.py:73
add(self, *args, **kwargs)
Definition plotting.py:257
errorband_kwargs
Default keyword arguments for errorband function.
Definition plotting.py:117
set_fill_options(self, fill_kwargs=None)
Definition plotting.py:176
finish(self, *args, **kwargs)
Definition plotting.py:275
set_plot_options(self, plot_kwargs={ 'linestyle':''})
Definition plotting.py:152
ymax
Maximum y value.
Definition plotting.py:69
prop_cycler
Property cycler used to give plots unique colors.
Definition plotting.py:127
set_errorbar_options(self, errorbar_kwargs={ 'fmt':'.', 'elinewidth':3, 'alpha':1})
Overrides default errorbar options for datapoint errorbars.
Definition plotting.py:160
xmax
Maximum x value.
Definition plotting.py:65
errorbar_kwargs
Default keyword arguments for errorbar function.
Definition plotting.py:115
float xscale
create figure
Definition plotting.py:71
dpi
set default dpi
Definition plotting.py:86
__init__(self, figure=None, axis=None, dpi=None)
Definition plotting.py:77
_plot_datapoints(self, axis, x, y, xerr=None, yerr=None)
Definition plotting.py:184
axis
Main axis which is used to draw.
Definition plotting.py:75
setAxisLimits(self, factor=0.0)
Definition plotting.py:263
ymin
Minimum y value.
Definition plotting.py:67
set_errorband_options(self, errorband_kwargs={ 'alpha':0.5})
Definition plotting.py:168
plot_kwargs
create figure
Definition plotting.py:113
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:697
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True)
Definition plotting.py:301
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:447
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:527
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:378
add(self, data, columns, *masks)
Definition plotting.py:1482
add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition plotting.py:612
distribution
create empty list for box axes
Definition plotting.py:1328
list box_axes
Axes for the boxplots.
Definition plotting.py:1310
range_in_std
Show only a certain range in terms of standard deviations of the data.
Definition plotting.py:1324
add(self, data, column, mask=None, weight_column=None, label=None)
Definition plotting.py:1332
__init__(self, figure=None, axis=None, normed=False, range_in_std=None, x_axis_label=None)
Definition plotting.py:1312
normed
Normalize histograms before drawing them.
Definition plotting.py:1322
weighted_mean_and_std(x, w)
Definition histogram.py:31
poisson_error(n_tot)
Definition histogram.py:24
Definition plot.py:1