Belle II Software light-2405-quaxo
plotting.py
1#!/usr/bin/env python3
2
3
4
11
12import copy
13import math
14
15import pandas
16import numpy
17import matplotlib.pyplot as plt
18import matplotlib.artist
19import matplotlib.figure
20import matplotlib.gridspec
21import matplotlib.colors
22import matplotlib.patches
23import matplotlib.ticker
24import matplotlib.patheffects as PathEffects
25
26
27from basf2_mva_evaluation import histogram
28
29import basf2 as b2
30
31import basf2_mva_util
32import matplotlib
33
34# Do not use standard backend TkAgg, because it is NOT thread-safe
35# You will get an RuntimeError: main thread is not in main loop otherwise!
36matplotlib.use("svg")
37matplotlib.rcParams.update({'font.size': 36})
38
39# Use the Belle II style while producing the plots
40plt.style.use("belle2")
41
42
43class Plotter:
44 """
45 Base class for all Plotters.
46 """
47
48 # stupid workaround for doxygen refusing to document things
49
50
52
53
57
58
59 plots = None
60
61 labels = None
62
63 xmin = None
64
65 xmax = None
66
67 ymin = None
68
69 ymax = None
70 yscale = 0.0
71 xscale = 0.0
72
73 figure = None
74
75 axis = None
76
77 def __init__(self, figure=None, axis=None):
78 """
79 Creates a new figure and axis if None is given, sets the default plot parameters
80 @param figure default draw figure which is used
81 @param axis default draw axis which is used
82 """
83 b2.B2INFO("Create new figure for class " + str(type(self)))
84 if figure is None:
85
86 self.figurefigure = matplotlib.figure.Figure(figsize=(32, 18))
87 self.figurefigure.set_tight_layout(False)
88 else:
89 self.figurefigure = figure
90
91 if axis is None:
92
93 self.axisaxis = self.figurefigure.add_subplot(1, 1, 1)
94 else:
95 self.axisaxis = axis
96
97
98 self.plotsplots = []
99
100 self.labelslabels = []
101
102 self.xmin, self.xmaxxmax = float(0), float(1)
103
104 self.ymin, self.ymaxymax = float(0), float(1)
105
106 self.yscaleyscale = 0.1
107
108 self.xscalexscale = 0.0
109
110
111 self.plot_kwargs = None
112
113 self.errorbar_kwargs = None
114
116
117 self.fill_kwargs = None
118
119 self.set_plot_options()
122 self.set_fill_options()
123
124 def add_subplot(self, gridspecs):
125 """
126 Adds a new subplot to the figure, updates all other axes
127 according to the given gridspec
128 @param gridspecs gridspecs for all axes including the new one
129 """
130 for gs, ax in zip(gridspecs[:-1], self.figurefigure.axes):
131 ax.set_position(gs.get_position(self.figurefigure))
132 ax.set_subplotspec(gs)
133 axis = self.figurefigure.add_subplot(gridspecs[-1], sharex=self.axisaxis)
134 return axis
135
136 def save(self, filename):
137 """
138 Save the figure into a file
139 @param filename of the file
140 """
141 b2.B2INFO("Save figure for class " + str(type(self)))
142 from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
143 canvas = FigureCanvas(self.figurefigure)
144 canvas.print_figure(filename, dpi=50)
145 return self
146
147 def set_plot_options(self, plot_kwargs={'linestyle': ''}):
148 """
149 Overrides default plot options for datapoint plot
150 @param plot_kwargs keyword arguments for the plot function
151 """
152 self.plot_kwargs = copy.copy(plot_kwargs)
153 return self
154
155 def set_errorbar_options(self, errorbar_kwargs={'fmt': '.', 'elinewidth': 3, 'alpha': 1}):
156 """
157 Overrides default errorbar options for datapoint errorbars
158 @param errorbar_kwargs keyword arguments for the errorbar function
159 """
160 self.errorbar_kwargs = copy.copy(errorbar_kwargs)
161 return self
162
163 def set_errorband_options(self, errorband_kwargs={'alpha': 0.5}):
164 """
165 Overrides default errorband options for datapoint errorband
166 @param errorbar_kwargs keyword arguments for the fill_between function
167 """
168 self.errorband_kwargs = copy.copy(errorband_kwargs)
169 return self
170
171 def set_fill_options(self, fill_kwargs=None):
172 """
173 Overrides default fill_between options for datapoint errorband
174 @param fill_kwargs keyword arguments for the fill_between function
175 """
176 self.fill_kwargs = copy.copy(fill_kwargs)
177 return self
178
179 def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None):
180 """
181 Plot the given datapoints, with plot, errorbar and make a errorband with fill_between
182 @param x coordinates of the data points
183 @param y coordinates of the data points
184 @param xerr symmetric error on x data points
185 @param yerr symmetric error on y data points
186 """
187 p = e = f = None
188 plot_kwargs = copy.copy(self.plot_kwargs)
189 errorbar_kwargs = copy.copy(self.errorbar_kwargs)
190 errorband_kwargs = copy.copy(self.errorband_kwargs)
191 fill_kwargs = copy.copy(self.fill_kwargs)
192
193 if plot_kwargs is None or 'color' not in plot_kwargs:
194 color = next(axis._get_lines.prop_cycler)
195 color = color['color']
196 plot_kwargs['color'] = color
197 else:
198 color = plot_kwargs['color']
199 color = matplotlib.colors.ColorConverter().to_rgb(color)
200 patch = matplotlib.patches.Patch(color=color, alpha=0.5)
201 patch.get_color = patch.get_facecolor
202 patches = [patch]
203
204 if plot_kwargs is not None:
205 p, = axis.plot(x, y, rasterized=True, **plot_kwargs)
206 patches.append(p)
207
208 if errorbar_kwargs is not None and (xerr is not None or yerr is not None):
209 if 'color' not in errorbar_kwargs:
210 errorbar_kwargs['color'] = color
211 if 'ecolor' not in errorbar_kwargs:
212 errorbar_kwargs['ecolor'] = [0.5 * x for x in color]
213
214 # fully mask nan values.
215 # Needed until https://github.com/matplotlib/matplotlib/pull/23333 makes it into the externals.
216 # TODO: remove in release 8.
217 if not isinstance(xerr, (numpy.ndarray, list)):
218 xerr = xerr*numpy.ones(len(x))
219 mask = numpy.logical_and.reduce([numpy.isfinite(v) for v in [x, y, xerr, yerr]])
220
221 e = axis.errorbar(x[mask], y[mask], xerr=xerr[mask], yerr=yerr[mask], rasterized=True, **errorbar_kwargs)
222 patches.append(e)
223
224 if errorband_kwargs is not None and yerr is not None:
225 if 'color' not in errorband_kwargs:
226 errorband_kwargs['color'] = color
227 if xerr is not None:
228 # Ensure that xerr and yerr are iterable numpy arrays
229 xerr = x + xerr - x
230 yerr = y + yerr - y
231 for _x, _y, _xe, _ye in zip(x, y, xerr, yerr):
232 axis.add_patch(matplotlib.patches.Rectangle((_x - _xe, _y - _ye), 2 * _xe, 2 * _ye, rasterized=True,
233 **errorband_kwargs))
234 else:
235 f = axis.fill_between(x, y - yerr, y + yerr, interpolate=True, rasterized=True, **errorband_kwargs)
236
237 if fill_kwargs is not None:
238 # to fill the last bin of a histogram
239 x = numpy.append(x, x[-1]+2*xerr[-1])
240 y = numpy.append(y, y[-1])
241 xerr = numpy.append(xerr, xerr[-1])
242
243 axis.fill_between(x-xerr, y, 0, rasterized=True, **fill_kwargs)
244
245 return (tuple(patches), p, e, f)
246
247 def add(self, *args, **kwargs):
248 """
249 Add a new plot to this plotter
250 """
251 return NotImplemented
252
253 def finish(self, *args, **kwargs):
254 """
255 Finish plotting and set labels, legends and stuff
256 """
257 return NotImplemented
258
259 def scale_limits(self):
260 """
261 Scale limits to increase distance to boundaries
262 """
263 self.ymin *= 1.0 - math.copysign(self.yscaleyscale, self.ymin)
264 self.ymaxymax *= 1.0 + math.copysign(self.yscaleyscale, self.ymaxymax)
265 self.xmin *= 1.0 - math.copysign(self.xscalexscale, self.xmin)
266 self.xmaxxmax *= 1.0 + math.copysign(self.xscalexscale, self.xmaxxmax)
267 return self
268
269
271 """
272 Plots the purity and the efficiency over the cut value (for cut choosing)
273 """
274
278
279 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
280 """
281 Add a new curve to the plot
282 @param data pandas.DataFrame containing all data
283 @param column which is used to calculate efficiency and purity for different cuts
284 @param signal_mask boolean numpy.array defining which events are signal events
285 @param bckgrd_mask boolean numpy.array defining which events are background events
286 @param weight_column column in data containing the weights for each event
287 """
288
289 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
290
291 if normed:
292 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
293 purity, purity_error = hists.get_purity(['Signal'], ['Background'])
294 else:
295 efficiency, efficiency_error = hists.get_true_positives(['Signal'])
296 purity, purity_error = hists.get_false_positives(['Background'])
297
298 cuts = hists.bin_centers
299
300 self.xmin, self.xmaxxmaxxmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmaxxmaxxmax])
301 self.ymin, self.ymaxymaxymax = numpy.nanmin([numpy.nanmin(efficiency), numpy.nanmin(purity), self.ymin]), \
302 numpy.nanmax([numpy.nanmax(efficiency), numpy.nanmax(purity), self.ymaxymaxymax])
303
304 self.plotsplots.append(self._plot_datapoints(self.axisaxis, cuts, efficiency, xerr=0, yerr=efficiency_error))
305
306 if normed:
307 self.labelslabels.append("Efficiency")
308 else:
309 self.labelslabels.append("True positive")
310
311 self.plotsplots.append(self._plot_datapoints(self.axisaxis, cuts, purity, xerr=0, yerr=purity_error))
312
313 if normed:
314 self.labelslabels.append("Purity")
315 else:
316 self.labelslabels.append("False positive")
317
318 return self
319
320 def finish(self):
321 """
322 Sets limits, title, axis-labels and legend of the plot
323 """
324 self.axisaxis.set_xlim((self.xmin, self.xmaxxmaxxmax))
325 self.axisaxis.set_ylim((self.ymin, self.ymaxymaxymax))
326 self.axisaxis.set_title("Classification Plot")
327 self.axisaxis.get_xaxis().set_label_text('Cut Value')
328 self.axisaxis.legend([x[0] for x in self.plotsplots], self.labelslabels, loc='best', fancybox=True, framealpha=0.5)
329 return self
330
331
333 """
334 Plots the signal to noise ratio over the cut value (for cut choosing)
335 """
336
340
341 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True):
342 """
343 Add a new curve to the plot
344 @param data pandas.DataFrame containing all data
345 @param column which is used to calculate signal to noise ratio for different cuts
346 @param signal_mask boolean numpy.array defining which events are signal events
347 @param bckgrd_mask boolean numpy.array defining which events are background events
348 @param weight_column column in data containing the weights for each event
349 """
350
351 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
352
353 signal2noise, signal2noise_error = hists.get_signal_to_noise(['Signal'], ['Background'])
354
355 cuts = hists.bin_centers
356
357 self.xmin, self.xmaxxmaxxmax = numpy.nanmin([numpy.nanmin(cuts), self.xmin]), numpy.nanmax([numpy.nanmax(cuts), self.xmaxxmaxxmax])
358 self.ymin, self.ymaxymaxymax = numpy.nanmin([numpy.nanmin(signal2noise), self.ymin]), \
359 numpy.nanmax([numpy.nanmax(signal2noise), self.ymaxymaxymax])
360
361 self.plotsplots.append(self._plot_datapoints(self.axisaxis, cuts, signal2noise, xerr=0, yerr=signal2noise_error))
362
363 self.labelslabels.append(column)
364
365 return self
366
367 def finish(self):
368 """
369 Sets limits, title, axis-labels and legend of the plot
370 """
371 self.axisaxis.set_xlim((self.xmin, self.xmaxxmaxxmax))
372 self.axisaxis.set_ylim((self.ymin, self.ymaxymaxymax))
373 self.axisaxis.set_title("Signal to Noise Plot")
374 self.axisaxis.get_xaxis().set_label_text('Cut Value')
375 self.axisaxis.legend([x[0] for x in self.plotsplots], self.labelslabels, loc='best', fancybox=True, framealpha=0.5)
376 return self
377
378
380 """
381 Plots the purity over the efficiency also known as ROC curve
382 """
383
387
388 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
389 """
390 Add a new curve to the ROC plot
391 @param data pandas.DataFrame containing all data
392 @param column which is used to calculate efficiency and purity for different cuts
393 @param signal_mask boolean numpy.array defining which events are signal events
394 @param bckgrd_mask boolean numpy.array defining which events are background events
395 @param weight_column column in data containing the weights for each event
396 """
397 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
398 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
399 purity, purity_error = hists.get_purity(['Signal'], ['Background'])
400
401 self.xmin, self.xmaxxmaxxmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmaxxmaxxmax])
402 self.ymin, self.ymaxymaxymax = numpy.nanmin([numpy.nanmin(purity), self.ymin]), numpy.nanmax([numpy.nanmax(purity), self.ymaxymaxymax])
403
404 p = self._plot_datapoints(self.axisaxis, efficiency, purity, xerr=efficiency_error, yerr=purity_error)
405 self.plotsplots.append(p)
406 if label is not None:
407 self.labelslabels.append(label)
408 else:
409 self.labelslabels.append(column)
410 return self
411
412 def finish(self):
413 """
414 Sets limits, title, axis-labels and legend of the plot
415 """
416 self.axisaxis.set_xlim((self.xmin, self.xmaxxmaxxmax))
417 self.axisaxis.set_ylim((self.ymin, self.ymaxymaxymax))
418 self.axisaxis.set_title("ROC Purity Plot")
419 self.axisaxis.get_xaxis().set_label_text('Efficiency')
420 self.axisaxis.get_yaxis().set_label_text('Purity')
421 self.axisaxis.legend([x[0] for x in self.plotsplots], self.labelslabels, loc='best', fancybox=True, framealpha=0.5)
422 return self
423
424
426 """
427 Plots the rejection over the efficiency also known as ROC curve
428 """
429
433
434 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None):
435 """
436 Add a new curve to the ROC plot
437 @param data pandas.DataFrame containing all data
438 @param column which is used to calculate efficiency and purity for different cuts
439 @param signal_mask boolean numpy.array defining which events are signal events
440 @param bckgrd_mask boolean numpy.array defining which events are background events
441 @param weight_column column in data containing the weights for each event
442 """
443 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
444 efficiency, efficiency_error = hists.get_efficiency(['Signal'])
445 rejection, rejection_error = hists.get_efficiency(['Background'])
446 rejection = 1 - rejection
447 if isinstance(efficiency, int) and not isinstance(rejection, int):
448 efficiency = numpy.array([efficiency] * len(rejection))
449 elif isinstance(rejection, int) and not isinstance(efficiency, int):
450 rejection = numpy.array([rejection] * len(efficiency))
451 elif isinstance(rejection, int) and isinstance(efficiency, int):
452 efficiency = numpy.array([efficiency])
453 rejection = numpy.array([rejection])
454
455 self.xmin, self.xmaxxmaxxmax = numpy.nanmin([efficiency.min(), self.xmin]), numpy.nanmax([efficiency.max(), self.xmaxxmaxxmax])
456 self.ymin, self.ymaxymaxymax = numpy.nanmin([rejection.min(), self.ymin]), numpy.nanmax([rejection.max(), self.ymaxymaxymax])
457
458 auc = numpy.abs(numpy.trapz(rejection, efficiency))
459
460 p = self._plot_datapoints(self.axisaxis, efficiency, rejection, xerr=efficiency_error, yerr=rejection_error)
461 self.plotsplots.append(p)
462 if label is not None:
463 self.labelslabels.append(label[:10] + f" ({auc:.2f})")
464 else:
465 self.labelslabels.append(column[:10] + f" ({auc:.2f})")
466 return self
467
468 def finish(self):
469 """
470 Sets limits, title, axis-labels and legend of the plot
471 """
472 self.axisaxis.set_xlim((self.xmin, self.xmaxxmaxxmax))
473 self.axisaxis.set_ylim((self.ymin, self.ymaxymaxymax))
474 self.axisaxis.set_title("ROC Rejection Plot")
475 self.axisaxis.get_xaxis().set_label_text('Signal Efficiency')
476 self.axisaxis.get_yaxis().set_label_text('Background Rejection')
477 self.axisaxis.legend([x[0] for x in self.plotsplots], self.labelslabels, loc='best', fancybox=True, framealpha=0.5)
478 return self
479
480
482 """
483 Plots multiple other plots into a grid 3x?
484 """
485
486 figure = None
487
488 axis = None
489
490 def __init__(self, cls, number_of_plots, figure=None):
491 """
492 Creates a new figure if None is given, sets the default plot parameters
493 @param figure default draw figure which is used
494 """
495 if figure is None:
496
497 self.figurefigurefigurefigure = matplotlib.figure.Figure(figsize=(32, 18))
498 self.figurefigurefigurefigure.set_tight_layout(True)
499 else:
500 self.figurefigurefigurefigure = figure
501
502 if number_of_plots == 1:
503 gs = matplotlib.gridspec.GridSpec(1, 1)
504 elif number_of_plots == 2:
505 gs = matplotlib.gridspec.GridSpec(1, 2)
506 elif number_of_plots == 3:
507 gs = matplotlib.gridspec.GridSpec(1, 3)
508 else:
509 gs = matplotlib.gridspec.GridSpec(int(numpy.ceil(number_of_plots / 3)), 3)
510
511
512 self.sub_plots = [cls(self.figurefigurefigurefigure, self.figurefigurefigurefigure.add_subplot(gs[i // 3, i % 3])) for i in range(number_of_plots)]
513
514 self.axisaxisaxisaxis = self.sub_plots[0].axis
516
517 def add(self, i, *args, **kwargs):
518 """
519 Call add function of ith subplot
520 @param i position of the subplot
521 """
522 self.sub_plots[i].add(*args, **kwargs)
523
524 def finish(self):
525 """
526 Sets limits, title, axis-labels and legend of the plot
527 """
528 for plot in self.sub_plots:
529 plot.finish()
530 return self
531
532
534 """
535 Plots the purity in each bin over the classifier output.
536 """
537
541
542 def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None):
543 """
544 Add a new curve to the Diagonal plot
545 @param data pandas.DataFrame containing all data
546 @param column which is used to calculate purity for different cuts
547 @param signal_mask boolean numpy.array defining which events are signal events
548 @param bckgrd_mask boolean numpy.array defining which events are background events
549 @param weight_column column in data containing the weights for each event
550 """
551 hists = histogram.Histograms(data, column, {'Signal': signal_mask, 'Background': bckgrd_mask}, weight_column=weight_column)
552 purity, purity_error = hists.get_purity_per_bin(['Signal'], ['Background'])
553
554 self.xmin, self.xmaxxmaxxmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmaxxmaxxmax)
555 # self.ymin, self.ymax = numpy.nanmin([numpy.nanmin(purity), self.ymin]), numpy.nanmax([numpy.nanmax(purity), self.ymax])
556 self.ymin, self.ymaxymaxymax = 0, 1
557
558 p = self._plot_datapoints(self.axisaxis, hists.bin_centers, purity, xerr=hists.bin_widths / 2.0, yerr=purity_error)
559 self.plotsplots.append(p)
560 self.labelslabels.append(column)
561 return self
562
563 def finish(self):
564 """
565 Sets limits, title, axis-labels and legend of the plot
566 """
567 self.scale_limits()
568 self.axisaxis.plot((0.0, 1.0), (0.0, 1.0), color='black')
569 self.axisaxis.set_xlim((self.xmin, self.xmaxxmaxxmax))
570 self.axisaxis.set_ylim((self.ymin, self.ymaxymaxymax))
571 self.axisaxis.set_title("Diagonal Plot")
572 self.axisaxis.get_xaxis().set_label_text('Classifier Output')
573 self.axisaxis.get_yaxis().set_label_text('Purity Per Bin')
574 self.axisaxis.legend([x[0] for x in self.plotsplots], self.labelslabels, loc='best', fancybox=True, framealpha=0.5)
575 return self
576
577
579 """
580 Plots distribution of a quantity
581 """
582
583 def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False,
584 keep_first_binning=False, range_in_std=None):
585 """
586 Creates a new figure and axis if None is given, sets the default plot parameters
587 @param figure default draw figure which is used
588 @param axis default draw axis which is used
589 @param normed true if histograms should be normed before drawing
590 @param keep_first_binning use the binning of the first distribution for further plots
591 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
592 """
593 super().__init__(figure, axis)
594
595 self.normed_to_all_entries = normed_to_all_entries
596
597 self.normed_to_bin_width = normed_to_bin_width
598
599 self.range_in_std = range_in_std
600 # if self.normed_to_all_entries or self.normed_to_bin_width:
601
602 self.yminymin = float(0)
603
604 self.ymaxymaxymax = float('-inf')
605
606 self.xminxmin = float('inf')
607
608 self.xmaxxmaxxmax = float('-inf')
609
610 self.keep_first_binning = keep_first_binning
611
612 self.first_binning = None
613
614 self.x_axis_label = ''
615
616 def add(self, data, column, mask=None, weight_column=None, label=None):
617 """
618 Add a new distribution to the plots
619 @param data pandas.DataFrame containing all data
620 @param column which is used to calculate distribution histogram
621 @param mask boolean numpy.array defining which events are used for the histogram
622 @param weight_column column in data containing the weights for each event
623 """
624 if mask is None:
625 mask = numpy.ones(len(data)).astype('bool')
626
627 bins = 100
628 if self.keep_first_binning and self.first_binning is not None:
629 bins = self.first_binning
630 hists = histogram.Histograms(data, column, {'Total': mask}, weight_column=weight_column,
631 bins=bins, equal_frequency=False, range_in_std=self.range_in_std)
632 if self.keep_first_binning and self.first_binning is None:
633 self.first_binning = hists.bins
634 hist, hist_error = hists.get_hist('Total')
635
636 if self.normed_to_all_entries:
637 normalization = float(numpy.sum(hist))
638 hist = hist / normalization
639 hist_error = hist_error / normalization
640
641 if self.normed_to_bin_width:
642 hist = hist / hists.bin_widths
643 hist_error = hist_error / hists.bin_widths
644
645 self.xminxmin, self.xmaxxmaxxmax = min(hists.bin_centers.min(), self.xminxmin), max(hists.bin_centers.max(), self.xmaxxmaxxmax)
646 self.yminymin = numpy.nanmin([hist.min(), self.yminymin])
647 self.ymaxymaxymax = numpy.nanmax([(hist + hist_error).max(), self.ymaxymaxymax])
648
649 p = self._plot_datapoints(self.axisaxis, hists.bin_centers, hist, xerr=hists.bin_widths / 2, yerr=hist_error)
650 self.plotsplots.append(p)
651 self.x_axis_label = column
652
653 appendix = ''
654 if self.ymaxymaxymax <= self.yminymin or self.xmaxxmaxxmax <= self.xminxmin:
655 appendix = ' No data to plot!'
656
657 if label is None:
658 self.labelslabels.append(column + appendix)
659 else:
660 self.labelslabels.append(label + appendix)
661 return self
662
663 def finish(self):
664 """
665 Sets limits, title, axis-labels and legend of the plot
666 """
667 self.axisaxis.set_title("Distribution Plot")
668 self.axisaxis.get_xaxis().set_label_text(self.x_axis_label)
669
670 self.axisaxis.legend([x[0] for x in self.plotsplots], self.labelslabels, loc='best', fancybox=True, framealpha=0.5)
671
672 if self.ymaxymaxymax <= self.yminymin or self.xmaxxmaxxmax <= self.xminxmin:
673 self.axisaxis.set_xlim((0., 1.))
674 self.axisaxis.set_ylim((0., 1.))
675 self.axisaxis.text(0.36, 0.5, 'No data to plot', fontsize=60, color='black')
676 return self
677
678 self.scale_limits()
679
680 self.axisaxis.set_xlim((self.xminxmin, self.xmaxxmaxxmax))
681 self.axisaxis.set_ylim((self.yminymin, self.ymaxymaxymax))
682
684 self.axisaxis.get_yaxis().set_label_text('# Entries per Bin / (# Entries * Bin Width)')
685 elif self.normed_to_all_entries:
686 self.axisaxis.get_yaxis().set_label_text('# Entries per Bin / # Entries')
687 elif self.normed_to_bin_width:
688 self.axisaxis.get_yaxis().set_label_text('# Entries per Bin / Bin Width')
689 else:
690 self.axisaxis.get_yaxis().set_label_text('# Entries per Bin')
691
692 return self
693
694
696 """
697 Create a boxplot
698 """
699
701
702 def __init__(self, figure=None, axis=None):
703 """
704 Creates a new figure and axis if None is given, sets the default plot parameters
705 @param figure default draw figure which is used
706 @param axis default draw axis which is used
707 """
708 super().__init__(figure=figure, axis=axis)
709
710
711 self.x_axis_label = ""
712
713 def add(self, data, column, mask=None, weight_column=None):
714 """
715 Add a new boxplot to the plots
716 @param data pandas.DataFrame containing all data
717 @param column which is used to calculate boxplot quantities
718 @param mask boolean numpy.array defining which events are used for the histogram
719 @param weight_column column in data containing the weights for each event
720 """
721 if mask is None:
722 mask = numpy.ones(len(data)).astype('bool')
723 x = data[column][mask]
724 if weight_column is not None:
725 # weight = data[weight_column][mask]
726 b2.B2WARNING("Weights are currently not used in boxplot, due to limitations in matplotlib")
727
728 if len(x) == 0:
729 b2.B2WARNING("Ignore empty boxplot.")
730 return self
731
732 p = self.axisaxis.boxplot(x, sym='k.', whis=1.5, vert=False, patch_artist=True, showmeans=True, widths=1,
733 boxprops=dict(facecolor='blue', alpha=0.5),
734 # medianprobs=dict(color='blue'),
735 # meanprobs=dict(color='red'),
736 )
737 self.plotsplots.append(p)
738 self.labelslabels.append(column)
739 self.x_axis_label = column
740 r"""
741 self.axisaxis.text(0.1, 0.9, (r'$ \mu = {:.2f}$' + '\n' + r'$median = {:.2f}$').format(x.mean(), x.median()),
742 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axisaxis.transAxes)
743 self.axisaxis.text(0.4, 0.9, (r'$ \sigma = {:.2f}$' + '\n' + r'$IQD = {:.2f}$').format(x.std(),
744 x.quantile(0.75) - x.quantile(0.25)),
745 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axisaxis.transAxes)
746 self.axisaxis.text(0.7, 0.9, (r'$min = {:.2f}$' + '\n' + r'$max = {:.2f}$').format(x.min(), x.max()),
747 fontsize=28, verticalalignment='top', horizontalalignment='left', transform=self.axisaxis.transAxes)
748 """
749
750 return self
751
752 def finish(self):
753 """
754 Sets limits, title, axis-labels and legend of the plot
755 """
756 matplotlib.artist.setp(self.axisaxis.get_yaxis(), visible=False)
757 self.axisaxis.get_xaxis().set_label_text(self.x_axis_label)
758 self.axisaxis.set_title("Box Plot")
759 return self
760
761
763 """
764 Plots the difference between two histograms
765 """
766
778
779 def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False):
780 """
781 Creates a new figure and axis if None is given, sets the default plot parameters
782 @param figure default draw figure which is used
783 @param axis default draw axis which is used
784 @param normed normalize minuend and subtrahend before comparing them
785 @param shift_to_zero mean difference is shifted to zero, to remove constant offset due to e.g. different sample sizes
786 """
787 super().__init__(figure, axis)
788 self.normed = normed
789 self.shift_to_zero = shift_to_zero
790 if self.normed:
791 self.yminymin = -0.01
792 self.ymaxymaxymax = 0.01
793 else:
794 self.yminymin = -1
795 self.ymaxymaxymax = 1
796
797 def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None):
798 """
799 Add a new difference plot
800 @param data pandas.DataFrame containing all data
801 @param column which is used to calculate distribution histogram
802 @param minuend_mask boolean numpy.array defining which events are for the minuend histogram
803 @param subtrahend_mask boolean numpy.array defining which events are for the subtrahend histogram
804 @param weight_column column in data containing the weights for each event
805 @param label label for the legend if None, the column name is used
806 """
807 hists = histogram.Histograms(data, column, {'Minuend': minuend_mask, 'Subtrahend': subtrahend_mask},
808 weight_column=weight_column, equal_frequency=False)
809 minuend, minuend_error = hists.get_hist('Minuend')
810 subtrahend, subtrahend_error = hists.get_hist('Subtrahend')
811
812 difference_error = histogram.poisson_error(minuend + subtrahend)
813 if self.normed:
814 difference_error = difference_error / (numpy.sum(minuend) + numpy.sum(subtrahend))
815 minuend = minuend / numpy.sum(minuend)
816 subtrahend = subtrahend / numpy.sum(subtrahend)
817 difference = minuend - subtrahend
818
819 if self.shift_to_zero:
820 difference = difference - numpy.mean(difference)
821
822 self.xmin, self.xmaxxmaxxmax = min(hists.bin_centers.min(), self.xmin), max(hists.bin_centers.max(), self.xmaxxmaxxmax)
823 self.yminymin = min((difference - difference_error).min(), self.yminymin)
824 self.ymaxymaxymax = max((difference + difference_error).max(), self.ymaxymaxymax)
825
826 p = self._plot_datapoints(self.axisaxis, hists.bin_centers, difference, xerr=hists.bin_widths / 2, yerr=difference_error)
827 self.plotsplots.append(p)
828 if label is None:
829 self.labelslabels.append(label)
830 else:
831 self.labelslabels.append(column)
832 self.x_axis_label = column
833 return self
834
835 def finish(self, line_color='black'):
836 """
837 Sets limits, title, axis-labels and legend of the plot
838 """
839 self.axisaxis.plot((self.xmin, self.xmaxxmaxxmax), (0, 0), color=line_color, linewidth=4, rasterized=True)
840 self.scale_limits()
841 self.axisaxis.set_xlim((self.xmin, self.xmaxxmaxxmax))
842 self.axisaxis.set_ylim((self.yminymin, self.ymaxymaxymax))
843 self.axisaxis.set_title("Difference Plot")
844 self.axisaxis.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(5))
845 self.axisaxis.get_xaxis().set_label_text(self.x_axis_label)
846 self.axisaxis.get_yaxis().set_label_text('Difference')
847 self.axisaxis.legend([x[0] for x in self.plotsplots], self.labelslabels, loc='best', fancybox=True, framealpha=0.5)
848 return self
849
850
852 """
853 Create TMVA-like overtraining control plot for a classification training
854 """
855
856
857 figure = None
858
859 axis = None
860
861 axis_d1 = None
862
863 axis_d2 = None
864
865 def __init__(self, figure=None):
866 """
867 Creates a new figure if None is given, sets the default plot parameters
868 @param figure default draw figure which is used
869 """
870 if figure is None:
871
872 self.figurefigurefigurefigure = matplotlib.figure.Figure(figsize=(32, 18))
873 self.figurefigurefigurefigure.set_tight_layout(True)
874 else:
875 self.figurefigurefigurefigure = figure
876
877 gs = matplotlib.gridspec.GridSpec(5, 1)
878
880
882
884
886
887 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None):
888 """
889 Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
890 otherwise there are too many curves in the plot to recognize anything in the plot.
891 @param data pandas.DataFrame containing all data
892 @param column which is used to calculate distribution histogram
893 @param train_mask boolean numpy.array defining which events are training events
894 @param test_mask boolean numpy.array defining which events are test events
895 @param signal_mask boolean numpy.array defining which events are signal events
896 @param bckgrd_mask boolean numpy.array defining which events are background events
897 @param weight_column column in data containing the weights for each event
898 """
899 distribution = Distribution(self.figurefigurefigurefigure, self.axisaxisaxisaxis, normed_to_all_entries=True)
900
901 distribution.set_plot_options(self.plot_kwargs)
902 distribution.set_errorbar_options(self.errorbar_kwargs)
903 distribution.set_errorband_options(self.errorband_kwargs)
904 distribution.add(data, column, test_mask & signal_mask, weight_column)
905 distribution.add(data, column, test_mask & bckgrd_mask, weight_column)
906
907 distribution.set_plot_options(
908 {'color': distribution.plots[0][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
909 distribution.set_fill_options({'color': distribution.plots[0][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
910 distribution.set_errorbar_options(None)
911 distribution.set_errorband_options(None)
912 distribution.add(data, column, train_mask & signal_mask, weight_column)
913 distribution.set_plot_options(
914 {'color': distribution.plots[1][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
915 distribution.set_fill_options({'color': distribution.plots[1][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
916 distribution.add(data, column, train_mask & bckgrd_mask, weight_column)
917
918 distribution.labels = ['Test-Signal', 'Test-Background', 'Train-Signal', 'Train-Background']
919 distribution.finish()
920
921 self.plot_kwargs['color'] = distribution.plots[0][0][0].get_color()
922 difference_signal = Difference(self.figurefigurefigurefigure, self.axis_d1axis_d1, shift_to_zero=True, normed=True)
923 difference_signal.set_plot_options(self.plot_kwargs)
924 difference_signal.set_errorbar_options(self.errorbar_kwargs)
925 difference_signal.set_errorband_options(self.errorband_kwargs)
926 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column)
927 self.axis_d1axis_d1.set_xlim((difference_signal.xmin, difference_signal.xmax))
928 self.axis_d1axis_d1.set_ylim((difference_signal.ymin, difference_signal.ymax))
929 difference_signal.plots = difference_signal.labels = []
930 difference_signal.finish(line_color=distribution.plots[0][0][0].get_color())
931
932 self.plot_kwargs['color'] = distribution.plots[1][0][0].get_color()
933 difference_bckgrd = Difference(self.figurefigurefigurefigure, self.axis_d2axis_d2, shift_to_zero=True, normed=True)
934 difference_bckgrd.set_plot_options(self.plot_kwargs)
935 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
936 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
937 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column)
938 self.axis_d2axis_d2.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
939 self.axis_d2axis_d2.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
940 difference_bckgrd.plots = difference_bckgrd.labels = []
941 difference_bckgrd.finish(line_color=distribution.plots[1][0][0].get_color())
942
943 try:
944 import scipy.stats
945 # Kolmogorov smirnov test
946 if len(data[column][train_mask & signal_mask]) == 0 or len(data[column][test_mask & signal_mask]) == 0:
947 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for signal due to missing data")
948 else:
949 ks = scipy.stats.ks_2samp(data[column][train_mask & signal_mask], data[column][test_mask & signal_mask])
950 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
951 self.axis_d1axis_d1.text(0.1, 0.9, r'signal (train - test) difference $p={:.2f}$'.format(ks[1]), fontsize=36, bbox=props,
952 verticalalignment='top', horizontalalignment='left', transform=self.axis_d1axis_d1.transAxes)
953 if len(data[column][train_mask & bckgrd_mask]) == 0 or len(data[column][test_mask & bckgrd_mask]) == 0:
954 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for background due to missing data")
955 else:
956 ks = scipy.stats.ks_2samp(data[column][train_mask & bckgrd_mask], data[column][test_mask & bckgrd_mask])
957 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
958 self.axis_d2axis_d2.text(0.1, 0.9, r'background (train - test) difference $p={:.2f}$'.format(ks[1]), fontsize=36,
959 bbox=props,
960 verticalalignment='top', horizontalalignment='left', transform=self.axis_d2axis_d2.transAxes)
961 except ImportError:
962 b2.B2WARNING("Cannot calculate kolmogorov smirnov test please install scipy!")
963
964 return self
965
966 def finish(self):
967 """
968 Sets limits, title, axis-labels and legend of the plot
969 """
970 self.axisaxisaxisaxis.set_title("Overtraining Plot")
971 self.axis_d1axis_d1.set_title("")
972 self.axis_d2axis_d2.set_title("")
973 matplotlib.artist.setp(self.axisaxisaxisaxis.get_xticklabels(), visible=False)
974 matplotlib.artist.setp(self.axis_d1axis_d1.get_xticklabels(), visible=False)
975 self.axisaxisaxisaxis.get_xaxis().set_label_text('')
976 self.axis_d1axis_d1.get_xaxis().set_label_text('')
977 self.axis_d2axis_d2.get_xaxis().set_label_text('Classifier Output')
978 return self
979
980
982 """
983 Plots distribution of a quantity including boxplots
984 """
985
986
987 box_axes = None
988
989 def __init__(self, figure=None, axis=None, normed=False, range_in_std=None):
990 """
991 Creates a new figure and axis if None is given, sets the default plot parameters
992 @param figure default draw figure which is used
993 @param axis default draw axis which is used
994 @param normed true if the histograms should be normed before drawing
995 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
996 """
997 super().__init__(figure, axis)
998
999 self.normed = normed
1000
1001 self.range_in_std = range_in_std
1002
1004
1005 self.distribution = Distribution(self.figurefigure, self.axisaxis, normed_to_all_entries=self.normed, range_in_std=self.range_in_std)
1006
1007 def add(self, data, column, mask=None, weight_column=None, label=None):
1008 """
1009 Add a new distribution plot, with additional information like a boxplot compared to
1010 the ordinary Distribution plot.
1011 @param data pandas.DataFrame containing all data
1012 @param column which is used to calculate distribution histogram
1013 @param mask boolean numpy.array defining which events are used for the distribution histogram
1014 @param weight_column column in data containing the weights for each event
1015 """
1019 self.distribution.add(data, column, mask, weight_column, label=label)
1020
1021 n = len(self.box_axesbox_axes) + 1
1022 gs = matplotlib.gridspec.GridSpec(4 * n, 1)
1023 gridspecs = [gs[:3 * n, :]] + [gs[3 * n + i, :] for i in range(n)]
1024 box_axis = self.add_subplot(gridspecs)
1025
1026 if self.range_in_std is not None:
1027 mean, std = histogram.weighted_mean_and_std(data[column], None if weight_column is None else data[weight_column])
1028 # Everything outside mean +- range_in_std * std is considered not inside the mask
1029 mask = mask & (data[column] > (mean - self.range_in_std * std)) & (data[column] < (mean + self.range_in_std * std))
1030 box = Box(self.figurefigure, box_axis)
1031 box.add(data, column, mask, weight_column)
1032 if len(box.plots) > 0:
1033 box.plots[0]['boxes'][0].set_facecolor(self.distribution.plots[-1][0][0].get_color())
1034 box.finish()
1035
1036 self.box_axesbox_axes.append(box_axis)
1037 return self
1038
1039 def finish(self):
1040 """
1041 Sets limits, title, axis-labels and legend of the plot
1042 """
1043 self.distribution.finish()
1044 matplotlib.artist.setp(self.axisaxis.get_xticklabels(), visible=False)
1045 self.axisaxis.get_xaxis().set_label_text('')
1046 for box_axis in self.box_axesbox_axes[:-1]:
1047 matplotlib.artist.setp(box_axis.get_xticklabels(), visible=False)
1048 box_axis.set_title("")
1049 box_axis.get_xaxis().set_label_text('')
1050 self.box_axesbox_axes[-1].set_title("")
1051 self.axisaxis.set_title("Distribution Plot")
1052 self.axisaxis.legend([x[0] for x in self.distribution.plots], self.distribution.labels,
1053 loc='best', fancybox=True, framealpha=0.5)
1054 return self
1055
1056
1058 """
1059 Plots change of a distribution of a quantity depending on the cut on a classifier
1060 """
1061
1062 figure = None
1063
1064 axis = None
1065
1066 axis_d1 = None
1067
1068 axis_d2 = None
1069
1070 def __init__(self, figure=None):
1071 """
1072 Creates a new figure if None is given, sets the default plot parameters
1073 @param figure default draw figure which is used
1074 """
1075 if figure is None:
1076
1077 self.figurefigurefigurefigure = matplotlib.figure.Figure(figsize=(32, 18))
1078 self.figurefigurefigurefigure.set_tight_layout(True)
1079 else:
1080 self.figurefigurefigurefigure = figure
1081
1082 gs = matplotlib.gridspec.GridSpec(3, 2)
1083
1085
1087
1089
1091
1092 def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None):
1093 """
1094 Add a new correlation plot.
1095 @param data pandas.DataFrame containing all data
1096 @param column which is used to calculate distribution histogram
1097 @param cut_column which is used to calculate cut on the other quantity defined by column
1098 @param quantiles list of quantiles between 0 and 100, defining the different cuts
1099 @param weight_column column in data containing the weights for each event
1100 """
1101 if len(data[cut_column]) == 0:
1102 b2.B2WARNING("Ignore empty Correlation.")
1103 return self
1104
1105 axes = [self.axisaxisaxisaxis, self.axis_d1axis_d1, self.axis_d2axis_d2]
1106
1107 for i, (l, m) in enumerate([('.', signal_mask | bckgrd_mask), ('S', signal_mask), ('B', bckgrd_mask)]):
1108
1109 if weight_column is not None:
1110 weights = numpy.array(data[weight_column][m])
1111 else:
1112 weights = numpy.ones(len(data[column][m]))
1113
1114 xrange = numpy.percentile(data[column][m], [5, 95])
1115
1116 colormap = plt.get_cmap('coolwarm')
1117 tmp, x = numpy.histogram(data[column][m], bins=100,
1118 range=xrange, density=True, weights=weights)
1119 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1120 axes[i].plot(bin_center, tmp, color='black', lw=1)
1121
1122 for quantil in numpy.arange(5, 100, 5):
1123 cut = numpy.percentile(data[cut_column][m], quantil)
1124 sel = data[cut_column][m] >= cut
1125 y, x = numpy.histogram(data[column][m][sel], bins=100,
1126 range=xrange, density=True, weights=weights[sel])
1127 bin_center = ((x + numpy.roll(x, 1)) / 2)[1:]
1128 axes[i].fill_between(bin_center, tmp, y, color=colormap(quantil / 100.0))
1129 tmp = y
1130
1131 axes[i].set_ylim(bottom=0)
1132
1133 flatness_score = basf2_mva_util.calculate_flatness(data[column][m], data[cut_column][m], weights)
1134 axes[i].set_title(r'Distribution for different quantiles: $\mathrm{{Flatness}}_{} = {:.3f}$'.format(l, flatness_score))
1135 return self
1136
1137 def finish(self):
1138 """
1139 Sets limits, title, axis-labels and legend of the plot
1140 """
1141 return self
1142
1143
1145 """
1146 Plots multivariate distribution using TSNE algorithm
1147 """
1148
1149 def add(self, data, columns, *masks):
1150 """
1151 Add a new correlation plot.
1152 @param data pandas.DataFrame containing all data
1153 @param columns which are used to calculate the correlations
1154 @param masks different classes to show in TSNE
1155 """
1156 try:
1157 import sklearn
1158 import sklearn.manifold
1159 model = sklearn.manifold.TSNE(n_components=2, random_state=0)
1160 data = numpy.array([data[column] for column in columns]).T
1161 model.fit(data)
1162 for mask in masks:
1163 data = numpy.array([data[column][mask] for column in columns]).T
1164 data = model.transform(data)
1165 self.axisaxis.scatter(data[:, 0], data[:, 1], rasterized=True)
1166 except ImportError:
1167 print("Cannot create TSNE plot. Install sklearn if you want it")
1168 return self
1169
1170 def finish(self):
1171 """
1172 Sets limits, title, axis-labels and legend of the plot
1173 """
1174 return self
1175
1176
1178 """
1179 Plots importance matrix
1180 """
1181
1182 def add(self, data, columns, variables):
1183 """
1184 Add a new correlation plot.
1185 @param data pandas.DataFrame containing all data
1186 @param columns which are used to calculate the correlations
1187 """
1188 self.figurefigure.set_tight_layout(True)
1189
1190 def norm(x):
1191 width = (numpy.max(x) - numpy.min(x))
1192 if width <= 0:
1193 return numpy.zeros(x.shape)
1194 return (x - numpy.min(x)) / width * 100
1195
1196 importance_matrix = numpy.vstack([norm(data[column]) for column in columns]).T
1197 importance_heatmap = self.axisaxis.pcolor(importance_matrix, cmap=plt.cm.RdBu, vmin=0.0, vmax=100,
1198 rasterized=True)
1199
1200 # put the major ticks at the middle of each cell
1201 self.axisaxis.set_yticks(numpy.arange(importance_matrix.shape[0]) + 0.5, minor=False)
1202 self.axisaxis.set_xticks(numpy.arange(importance_matrix.shape[1]) + 0.5, minor=False)
1203
1204 self.axisaxis.set_xticklabels(columns, minor=False, rotation=90)
1205 self.axisaxis.set_yticklabels(variables, minor=False)
1206
1207 self.axisaxis.xaxis.tick_top()
1208
1209 for y in range(importance_matrix.shape[0]):
1210 for x in range(importance_matrix.shape[1]):
1211 txt = self.axisaxis.text(x + 0.5, y + 0.5, f'{importance_matrix[y, x]:.0f}',
1212 size=14,
1213 horizontalalignment='center',
1214 verticalalignment='center',
1215 color='w')
1216 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='k')])
1217
1218 cb = self.figurefigure.colorbar(importance_heatmap, ticks=[0.0, 100], orientation='vertical')
1219 cb.ax.set_yticklabels(['low', 'high'])
1220
1221 # remove whitespace
1222 self.axisaxis.set_ylim(0, importance_matrix.shape[0])
1223
1224 self.axisaxis.set_aspect('equal')
1225
1226 return self
1227
1228 def finish(self):
1229 """
1230 Sets limits, title, axis-labels and legend of the plot
1231 """
1232 return self
1233
1234
1236 """
1237 Plots correlation matrix
1238 """
1239
1240 figure = None
1241
1242 signal_axis = None
1243
1244 bckgrd_axis = None
1245
1246 def __init__(self, figure=None):
1247 """
1248 Creates a new figure if None is given, sets the default plot parameters
1249 @param figure default draw figure which is used
1250 """
1251 if figure is None:
1252
1253 self.figurefigurefigurefigure = matplotlib.figure.Figure(figsize=(32, 18))
1254 self.figurefigurefigurefigure.set_tight_layout(True)
1255 else:
1256 self.figurefigurefigurefigure = figure
1257
1258 gs = matplotlib.gridspec.GridSpec(8, 2)
1259
1261
1263
1265
1267
1269
1270 def add(self, data, columns, signal_mask, bckgrd_mask):
1271 """
1272 Add a new correlation plot.
1273 @param data pandas.DataFrame containing all data
1274 @param columns which are used to calculate the correlations
1275 """
1276 signal_corr = numpy.corrcoef(numpy.vstack([data[column][signal_mask] for column in columns])) * 100
1277 bckgrd_corr = numpy.corrcoef(numpy.vstack([data[column][bckgrd_mask] for column in columns])) * 100
1278
1279 signal_heatmap = self.signal_axissignal_axis.pcolor(signal_corr, cmap=plt.cm.RdBu, vmin=-100.0, vmax=100.0)
1280 self.bckgrd_axisbckgrd_axis.pcolor(bckgrd_corr, cmap=plt.cm.RdBu, vmin=-100.0, vmax=100.0)
1281
1282 self.signal_axissignal_axis.invert_yaxis()
1283 self.signal_axissignal_axis.xaxis.tick_top()
1284 self.bckgrd_axisbckgrd_axis.invert_yaxis()
1285 self.bckgrd_axisbckgrd_axis.xaxis.tick_top()
1286
1287 # put the major ticks at the middle of each cell
1288 self.signal_axissignal_axis.set_xticks(numpy.arange(signal_corr.shape[0]) + 0.5, minor=False)
1289 self.signal_axissignal_axis.set_yticks(numpy.arange(signal_corr.shape[1]) + 0.5, minor=False)
1290
1291 self.signal_axissignal_axis.set_xticklabels(columns, minor=False, rotation=90)
1292 self.signal_axissignal_axis.set_yticklabels(columns, minor=False)
1293
1294 # put the major ticks at the middle of each cell
1295 self.bckgrd_axisbckgrd_axis.set_xticks(numpy.arange(bckgrd_corr.shape[0]) + 0.5, minor=False)
1296 self.bckgrd_axisbckgrd_axis.set_yticks(numpy.arange(bckgrd_corr.shape[1]) + 0.5, minor=False)
1297
1298 self.bckgrd_axisbckgrd_axis.set_xticklabels(columns, minor=False, rotation=90)
1299 self.bckgrd_axisbckgrd_axis.set_yticklabels(columns, minor=False)
1300
1301 for y in range(signal_corr.shape[0]):
1302 for x in range(signal_corr.shape[1]):
1303 txt = self.signal_axissignal_axis.text(x + 0.5, y + 0.5, f'{signal_corr[y, x]:.0f}',
1304 size=14,
1305 horizontalalignment='center',
1306 verticalalignment='center',
1307 color='w')
1308 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='k')])
1309
1310 for y in range(bckgrd_corr.shape[0]):
1311 for x in range(bckgrd_corr.shape[1]):
1312 txt = self.bckgrd_axisbckgrd_axis.text(x + 0.5, y + 0.5, f'{bckgrd_corr[y, x]:.0f}',
1313 size=14,
1314 horizontalalignment='center',
1315 verticalalignment='center',
1316 color='w')
1317 txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='k')])
1318
1319 cb = self.figurefigurefigurefigure.colorbar(signal_heatmap, cax=self.colorbar_axis, ticks=[-100, 0, 100], orientation='horizontal')
1320 cb.solids.set_rasterized(True)
1321 cb.ax.set_xticklabels(['negative', 'uncorrelated', 'positive'])
1322
1323 self.signal_axissignal_axis.text(0.5, -1.0, "Signal", horizontalalignment='center')
1324 self.bckgrd_axisbckgrd_axis.text(0.5, -1.0, "Background", horizontalalignment='center')
1325
1326 # remove whitespace
1327 self.signal_axissignal_axis.set_xlim(0, signal_corr.shape[0])
1328 self.signal_axissignal_axis.set_ylim(0, signal_corr.shape[1])
1329 self.bckgrd_axisbckgrd_axis.set_xlim(0, bckgrd_corr.shape[0])
1330 self.bckgrd_axisbckgrd_axis.set_ylim(0, bckgrd_corr.shape[1])
1331 return self
1332
1333 def finish(self):
1334 """
1335 Sets limits, title, axis-labels and legend of the plot
1336 """
1337 matplotlib.artist.setp(self.bckgrd_axisbckgrd_axis.get_yticklabels(), visible=False)
1338 return self
1339
1340
1341if __name__ == '__main__':
1342
1343 def get_data(N, columns):
1344 """
1345 Creates fake data for example plots
1346 """
1347 N /= 2
1348 n = len(columns) - 1
1349 xs = numpy.random.normal(0, size=(N, n))
1350 xb = numpy.random.normal(1, size=(N, n))
1351 ys = numpy.zeros(N)
1352 yb = numpy.ones(N)
1353 data = pandas.DataFrame(numpy.c_[numpy.r_[xs, xb], numpy.r_[ys, yb]], columns=columns)
1354 return data.reindex(numpy.random.permutation(data.index))
1355
1356 import seaborn
1357 # Set nice searborn settings
1358 seaborn.set(font_scale=3)
1359 seaborn.set_style('whitegrid')
1360
1361 # Standard plots
1362 N = 100000
1363 data = get_data(N, columns=['FastBDT', 'NeuroBayes', 'isSignal'])
1364 data['type'] = ''
1365 data.type.iloc[:N / 2] = 'Train'
1366 data.type.iloc[N / 2:] = 'Test'
1367
1368 p = Box()
1369 p.add(data, 'FastBDT')
1370 p.finish()
1371 p.save('box_plot.png')
1372
1374 p.add(data, 'FastBDT')
1375 p.add(data, 'NeuroBayes')
1376 p.finish()
1377 p.save('verbose_distribution_plot.png')
1378
1380 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1381 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1382 p.finish()
1383 p.save('roc_purity_plot.png')
1384
1386 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1387 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1388 p.finish()
1389 p.save('roc_rejection_plot.png')
1390
1391 p = Diagonal()
1392 p.add(data, 'FastBDT', data['isSignal'] == 1, data['isSignal'] == 0)
1393 p.add(data, 'NeuroBayes', data['isSignal'] == 1, data['isSignal'] == 0)
1394 p.finish()
1395 p.save('diagonal_plot.png')
1396
1397 p = Distribution()
1398 p.add(data, 'FastBDT')
1399 p.add(data, 'NeuroBayes')
1400 p.finish()
1401 p.save('distribution_plot.png')
1402
1403 p = Difference()
1404 p.add(data, 'FastBDT', data['type'] == 'Train', data['type'] == 'Test')
1405 p.add(data, 'NeuroBayes', data['type'] == 'Train', data['type'] == 'Test')
1406 p.finish()
1407 p.save('difference_plot.png')
1408
1409 p = Overtraining()
1410 p.add(data, 'FastBDT', data['type'] == 'Train', data['type'] == 'Test', data['isSignal'] == 1, data['isSignal'] == 0)
1411 p.finish()
1412 p.save('overtraining_plot.png')
1413
1414 p = Correlation()
1415 p.add(data, 'FastBDT', 'NeuroBayes', [0, 20, 40, 60, 80, 100], data['isSignal'] == 0)
1416 p.finish()
1417 p.save('correlation_plot.png')
1418
1419 p = CorrelationMatrix()
1420 data['FastBDT2'] = data['FastBDT']**2
1421 data['NeuroBayes2'] = data['NeuroBayes']**2
1422 data['FastBDT3'] = data['FastBDT']**3
1423 data['NeuroBayes3'] = data['NeuroBayes']**3
1424 p.add(data, ['FastBDT', 'NeuroBayes', 'FastBDT2', 'NeuroBayes2', 'FastBDT3', 'NeuroBayes3'])
1425 p.finish()
1426 p.save('correlation_matrix.png')
def calculate_flatness(f, p, w=None)
x_axis_label
Label on x axis.
Definition: plotting.py:711
def add(self, data, column, mask=None, weight_column=None)
Definition: plotting.py:713
def __init__(self, figure=None, axis=None)
Definition: plotting.py:702
def finish(self)
Definition: plotting.py:752
signal_axis
add signal subplot
Definition: plotting.py:1260
def add(self, data, columns, signal_mask, bckgrd_mask)
Definition: plotting.py:1270
colorbar_axis
Colorbar axis contains the colorbar.
Definition: plotting.py:1264
None bckgrd_axis
Axis which shows the correlation of the background samples.
Definition: plotting.py:1244
def __init__(self, figure=None)
Definition: plotting.py:1246
None figure
figure which is used to draw
Definition: plotting.py:1240
None signal_axis
Main axis which shows the correlation of the signal samples.
Definition: plotting.py:1242
bckgrd_axis
add background subplot
Definition: plotting.py:1262
axis
Usual axis object which every Plotter object needs, here it is just a dummy.
Definition: plotting.py:1266
def add(self, data, column, cut_column, quantiles, signal_mask=None, bckgrd_mask=None, weight_column=None)
Definition: plotting.py:1092
axis_d1
define second subplot
Definition: plotting.py:1086
figure
create figure
Definition: plotting.py:1077
None axis_d1
Axis which shows shape of signal.
Definition: plotting.py:1066
None axis
Main axis which is used to draw.
Definition: plotting.py:1064
def __init__(self, figure=None)
Definition: plotting.py:1070
axis_d2
define third subplot
Definition: plotting.py:1088
None figure
figure which is used to draw
Definition: plotting.py:1062
None axis_d2
Axis which shows shape of background.
Definition: plotting.py:1068
axis
define first subplot
Definition: plotting.py:1084
ymax
Maximum y value.
Definition: plotting.py:556
xmax
Maximum x value.
Definition: plotting.py:554
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None)
Definition: plotting.py:542
def finish(self)
Definition: plotting.py:563
x_axis_label
Label on x axis.
Definition: plotting.py:832
shift_to_zero
Mean difference is shifted to zero (removes constant offset) if this is true.
Definition: plotting.py:789
def __init__(self, figure=None, axis=None, normed=False, shift_to_zero=False)
Definition: plotting.py:779
ymax
Maximum y value.
Definition: plotting.py:792
xmax
Maximum x value.
Definition: plotting.py:822
def add(self, data, column, minuend_mask, subtrahend_mask, weight_column=None, label=None)
Definition: plotting.py:797
ymin
min y value
Definition: plotting.py:791
def finish(self, line_color='black')
Definition: plotting.py:835
normed
Minuend and subtrahend are normed before comparing them if this is true.
Definition: plotting.py:788
def __init__(self, figure=None, axis=None, normed_to_all_entries=False, normed_to_bin_width=False, keep_first_binning=False, range_in_std=None)
Definition: plotting.py:584
def add(self, data, column, mask=None, weight_column=None, label=None)
Definition: plotting.py:616
x_axis_label
x axis label
Definition: plotting.py:614
keep_first_binning
Keep first binning if user wants so.
Definition: plotting.py:610
normed_to_all_entries
Normalize histograms before drawing them.
Definition: plotting.py:595
first_binning
first binning
Definition: plotting.py:612
range_in_std
Show only a certain range in terms of standard deviations of the data.
Definition: plotting.py:599
normed_to_bin_width
Normalize histograms before drawing them.
Definition: plotting.py:597
def add(self, data, columns, variables)
Definition: plotting.py:1182
def finish(self)
Definition: plotting.py:1228
def add(self, i, *args, **kwargs)
Definition: plotting.py:517
figure
create figure
Definition: plotting.py:497
def __init__(self, cls, number_of_plots, figure=None)
Definition: plotting.py:490
None axis
Main axis.
Definition: plotting.py:488
None figure
figure which is used to draw
Definition: plotting.py:486
sub_plots
the subplots which are displayed in the grid
Definition: plotting.py:512
axis
the axis of the first subplot
Definition: plotting.py:514
def finish(self)
Definition: plotting.py:524
axis_d1
define second subplot
Definition: plotting.py:881
figure
create figure
Definition: plotting.py:872
def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None)
Definition: plotting.py:887
None axis_d1
Axis which shows the difference between training and test signal.
Definition: plotting.py:861
None axis
Main axis which is used to draw.
Definition: plotting.py:859
def __init__(self, figure=None)
Definition: plotting.py:865
axis_d2
define third subplot
Definition: plotting.py:883
None figure
figure which is used to draw
Definition: plotting.py:857
None axis_d2
Axis which shows the difference between training and test background.
Definition: plotting.py:863
axis
define first subplot
Definition: plotting.py:879
def finish(self, *args, **kwargs)
Definition: plotting.py:253
fill_kwargs
Default keyword arguments for fill_between function.
Definition: plotting.py:117
None ymin
Minimum y value.
Definition: plotting.py:67
def set_errorband_options(self, errorband_kwargs={ 'alpha':0.5})
Definition: plotting.py:163
plots
create empty list for plots
Definition: plotting.py:98
float xscale
limit scale
Definition: plotting.py:71
figure
create figure
Definition: plotting.py:86
None ymax
Maximum y value.
Definition: plotting.py:69
errorband_kwargs
Default keyword arguments for errorband function.
Definition: plotting.py:115
None axis
Main axis which is used to draw.
Definition: plotting.py:75
def scale_limits(self)
Definition: plotting.py:259
def add(self, *args, **kwargs)
Definition: plotting.py:247
None xmin
Minimum x value.
Definition: plotting.py:63
def set_fill_options(self, fill_kwargs=None)
Definition: plotting.py:171
def save(self, filename)
Definition: plotting.py:136
def __init__(self, figure=None, axis=None)
Definition: plotting.py:77
None figure
figure which is used to draw
Definition: plotting.py:73
ymax
set y limits
Definition: plotting.py:104
None plots
Plots added to the axis so far.
Definition: plotting.py:59
xmax
set x limits
Definition: plotting.py:102
errorbar_kwargs
Default keyword arguments for errorbar function.
Definition: plotting.py:113
labels
create empty list for labels
Definition: plotting.py:100
axis
divide figure into subplots
Definition: plotting.py:93
def _plot_datapoints(self, axis, x, y, xerr=None, yerr=None)
Definition: plotting.py:179
def set_errorbar_options(self, errorbar_kwargs={ 'fmt':'.', 'elinewidth':3, 'alpha':1})
Overrides default errorbar options for datapoint errorbars.
Definition: plotting.py:155
float yscale
limit scale
Definition: plotting.py:70
None labels
Labels of the plots added so far.
Definition: plotting.py:61
def add_subplot(self, gridspecs)
Definition: plotting.py:124
def set_plot_options(self, plot_kwargs={ 'linestyle':''})
Definition: plotting.py:147
plot_kwargs
Default keyword arguments for plot function.
Definition: plotting.py:111
None xmax
Maximum x value.
Definition: plotting.py:65
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True)
Definition: plotting.py:279
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition: plotting.py:388
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, label=None)
Definition: plotting.py:434
def add(self, data, column, signal_mask, bckgrd_mask, weight_column=None, normed=True)
Definition: plotting.py:341
def add(self, data, columns, *masks)
Definition: plotting.py:1149
def finish(self)
Definition: plotting.py:1170
def add(self, data, column, mask=None, weight_column=None, label=None)
Definition: plotting.py:1007
distribution
The distribution plot.
Definition: plotting.py:1005
range_in_std
Show only a certain range in terms of standard deviations of the data.
Definition: plotting.py:1001
None box_axes
Axes for the boxplots.
Definition: plotting.py:987
box_axes
create empty list for box axes
Definition: plotting.py:1003
normed
Normalize histograms before drawing them.
Definition: plotting.py:999
def __init__(self, figure=None, axis=None, normed=False, range_in_std=None)
Definition: plotting.py:989
def weighted_mean_and_std(x, w)
Definition: histogram.py:31
def poisson_error(n_tot)
Definition: histogram.py:24
Definition: plot.py:1