Belle II Software development
Overtraining Class Reference
Inheritance diagram for Overtraining:
Plotter

Public Member Functions

def __init__ (self, figure=None)
 
def add (self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None)
 
def finish (self)
 

Public Attributes

 figure
 create figure
 
 axis
 define first subplot
 
 axis_d1
 define second subplot
 
 axis_d2
 define third subplot
 

Static Public Attributes

None figure = None
 figure which is used to draw
 
None axis = None
 Main axis which is used to draw.
 
None axis_d1 = None
 Axis which shows the difference between training and test signal.
 
None axis_d2 = None
 Axis which shows the difference between training and test background.
 

Detailed Description

Create TMVA-like overtraining control plot for a classification training

Definition at line 859 of file plotting.py.

Constructor & Destructor Documentation

◆ __init__()

def __init__ (   self,
  figure = None 
)
Creates a new figure if None is given, sets the default plot parameters
@param figure default draw figure which is used

Reimplemented from Plotter.

Definition at line 873 of file plotting.py.

873 def __init__(self, figure=None):
874 """
875 Creates a new figure if None is given, sets the default plot parameters
876 @param figure default draw figure which is used
877 """
878 if figure is None:
879
880 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
881 self.figure.set_tight_layout(True)
882 else:
883 self.figure = figure
884
885 gs = matplotlib.gridspec.GridSpec(5, 1)
886
887 self.axis = self.figure.add_subplot(gs[:3, :])
888
889 self.axis_d1 = self.figure.add_subplot(gs[3, :], sharex=self.axis)
890
891 self.axis_d2 = self.figure.add_subplot(gs[4, :], sharex=self.axis)
892
893 super().__init__(self.figure, self.axis)
894

Member Function Documentation

◆ add()

def add (   self,
  data,
  column,
  train_mask,
  test_mask,
  signal_mask,
  bckgrd_mask,
  weight_column = None 
)
Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
otherwise there are too many curves in the plot to recognize anything in the plot.
@param data pandas.DataFrame containing all data
@param column which is used to calculate distribution histogram
@param train_mask boolean numpy.array defining which events are training events
@param test_mask boolean numpy.array defining which events are test events
@param signal_mask boolean numpy.array defining which events are signal events
@param bckgrd_mask boolean numpy.array defining which events are background events
@param weight_column column in data containing the weights for each event

Reimplemented from Plotter.

Definition at line 895 of file plotting.py.

895 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None):
896 """
897 Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
898 otherwise there are too many curves in the plot to recognize anything in the plot.
899 @param data pandas.DataFrame containing all data
900 @param column which is used to calculate distribution histogram
901 @param train_mask boolean numpy.array defining which events are training events
902 @param test_mask boolean numpy.array defining which events are test events
903 @param signal_mask boolean numpy.array defining which events are signal events
904 @param bckgrd_mask boolean numpy.array defining which events are background events
905 @param weight_column column in data containing the weights for each event
906 """
907 distribution = Distribution(self.figure, self.axis, normed_to_all_entries=True)
908
909 distribution.set_plot_options(self.plot_kwargs)
910 distribution.set_errorbar_options(self.errorbar_kwargs)
911 distribution.set_errorband_options(self.errorband_kwargs)
912 distribution.add(data, column, test_mask & signal_mask, weight_column)
913 distribution.add(data, column, test_mask & bckgrd_mask, weight_column)
914
915 distribution.set_plot_options(
916 {'color': distribution.plots[0][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
917 distribution.set_fill_options({'color': distribution.plots[0][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
918 distribution.set_errorbar_options(None)
919 distribution.set_errorband_options(None)
920 distribution.add(data, column, train_mask & signal_mask, weight_column)
921 distribution.set_plot_options(
922 {'color': distribution.plots[1][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
923 distribution.set_fill_options({'color': distribution.plots[1][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
924 distribution.add(data, column, train_mask & bckgrd_mask, weight_column)
925
926 distribution.labels = ['Test-Signal', 'Test-Background', 'Train-Signal', 'Train-Background']
927 distribution.finish()
928
929 self.plot_kwargs['color'] = distribution.plots[0][0][0].get_color()
930 difference_signal = Difference(self.figure, self.axis_d1, shift_to_zero=True, normed=True)
931 difference_signal.set_plot_options(self.plot_kwargs)
932 difference_signal.set_errorbar_options(self.errorbar_kwargs)
933 difference_signal.set_errorband_options(self.errorband_kwargs)
934 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column)
935 self.axis_d1.set_xlim((difference_signal.xmin, difference_signal.xmax))
936 self.axis_d1.set_ylim((difference_signal.ymin, difference_signal.ymax))
937 difference_signal.plots = difference_signal.labels = []
938 difference_signal.finish(line_color=distribution.plots[0][0][0].get_color())
939
940 self.plot_kwargs['color'] = distribution.plots[1][0][0].get_color()
941 difference_bckgrd = Difference(self.figure, self.axis_d2, shift_to_zero=True, normed=True)
942 difference_bckgrd.set_plot_options(self.plot_kwargs)
943 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
944 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
945 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column)
946 self.axis_d2.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
947 self.axis_d2.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
948 difference_bckgrd.plots = difference_bckgrd.labels = []
949 difference_bckgrd.finish(line_color=distribution.plots[1][0][0].get_color())
950
951 try:
952 import scipy.stats
953 # Kolmogorov smirnov test
954 if len(data[column][train_mask & signal_mask]) == 0 or len(data[column][test_mask & signal_mask]) == 0:
955 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for signal due to missing data")
956 else:
957 ks = scipy.stats.ks_2samp(data[column][train_mask & signal_mask], data[column][test_mask & signal_mask])
958 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
959 self.axis_d1.text(0.1, 0.9, r'signal (train - test) difference $p={:.2f}$'.format(ks[1]), fontsize=36, bbox=props,
960 verticalalignment='top', horizontalalignment='left', transform=self.axis_d1.transAxes)
961 if len(data[column][train_mask & bckgrd_mask]) == 0 or len(data[column][test_mask & bckgrd_mask]) == 0:
962 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for background due to missing data")
963 else:
964 ks = scipy.stats.ks_2samp(data[column][train_mask & bckgrd_mask], data[column][test_mask & bckgrd_mask])
965 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
966 self.axis_d2.text(0.1, 0.9, r'background (train - test) difference $p={:.2f}$'.format(ks[1]), fontsize=36,
967 bbox=props,
968 verticalalignment='top', horizontalalignment='left', transform=self.axis_d2.transAxes)
969 except ImportError:
970 b2.B2WARNING("Cannot calculate kolmogorov smirnov test please install scipy!")
971
972 return self
973

◆ finish()

def finish (   self)
Sets limits, title, axis-labels and legend of the plot

Reimplemented from Plotter.

Definition at line 974 of file plotting.py.

974 def finish(self):
975 """
976 Sets limits, title, axis-labels and legend of the plot
977 """
978 self.axis.set_title("Overtraining Plot")
979 self.axis_d1.set_title("")
980 self.axis_d2.set_title("")
981 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=False)
982 matplotlib.artist.setp(self.axis_d1.get_xticklabels(), visible=False)
983 self.axis.get_xaxis().set_label_text('')
984 self.axis_d1.get_xaxis().set_label_text('')
985 self.axis_d2.get_xaxis().set_label_text('Classifier Output')
986 return self
987
988

Member Data Documentation

◆ axis [1/2]

None axis = None
static

Main axis which is used to draw.

Definition at line 867 of file plotting.py.

◆ axis [2/2]

axis

define first subplot

Definition at line 887 of file plotting.py.

◆ axis_d1 [1/2]

None axis_d1 = None
static

Axis which shows the difference between training and test signal.

Definition at line 869 of file plotting.py.

◆ axis_d1 [2/2]

axis_d1

define second subplot

Definition at line 889 of file plotting.py.

◆ axis_d2 [1/2]

None axis_d2 = None
static

Axis which shows the difference between training and test background.

Definition at line 871 of file plotting.py.

◆ axis_d2 [2/2]

axis_d2

define third subplot

Definition at line 891 of file plotting.py.

◆ figure [1/2]

None figure = None
static

figure which is used to draw

Definition at line 865 of file plotting.py.

◆ figure [2/2]

figure

create figure

Definition at line 880 of file plotting.py.


The documentation for this class was generated from the following file: