Belle II Software light-2406-ragdoll
Overtraining Class Reference
Inheritance diagram for Overtraining:
Collaboration diagram for Overtraining:

Public Member Functions

def __init__ (self, figure=None)
 
def add (self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None)
 
def finish (self)
 

Public Attributes

 figure
 create figure
 
 axis
 define first subplot
 
 axis_d1
 define second subplot
 
 axis_d2
 define third subplot
 

Static Public Attributes

None figure = None
 figure which is used to draw
 
None axis = None
 Main axis which is used to draw.
 
None axis_d1 = None
 Axis which shows the difference between training and test signal.
 
None axis_d2 = None
 Axis which shows the difference between training and test background.
 

Detailed Description

Create TMVA-like overtraining control plot for a classification training

Definition at line 858 of file plotting.py.

Constructor & Destructor Documentation

◆ __init__()

def __init__ (   self,
  figure = None 
)
Creates a new figure if None is given, sets the default plot parameters
@param figure default draw figure which is used

Reimplemented from Plotter.

Definition at line 872 of file plotting.py.

872 def __init__(self, figure=None):
873 """
874 Creates a new figure if None is given, sets the default plot parameters
875 @param figure default draw figure which is used
876 """
877 if figure is None:
878
879 self.figure = matplotlib.figure.Figure(figsize=(32, 18))
880 self.figure.set_tight_layout(True)
881 else:
882 self.figure = figure
883
884 gs = matplotlib.gridspec.GridSpec(5, 1)
885
886 self.axis = self.figure.add_subplot(gs[:3, :])
887
888 self.axis_d1 = self.figure.add_subplot(gs[3, :], sharex=self.axis)
889
890 self.axis_d2 = self.figure.add_subplot(gs[4, :], sharex=self.axis)
891
892 super().__init__(self.figure, self.axis)
893

Member Function Documentation

◆ add()

def add (   self,
  data,
  column,
  train_mask,
  test_mask,
  signal_mask,
  bckgrd_mask,
  weight_column = None 
)
Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
otherwise there are too many curves in the plot to recognize anything in the plot.
@param data pandas.DataFrame containing all data
@param column which is used to calculate distribution histogram
@param train_mask boolean numpy.array defining which events are training events
@param test_mask boolean numpy.array defining which events are test events
@param signal_mask boolean numpy.array defining which events are signal events
@param bckgrd_mask boolean numpy.array defining which events are background events
@param weight_column column in data containing the weights for each event

Reimplemented from Plotter.

Definition at line 894 of file plotting.py.

894 def add(self, data, column, train_mask, test_mask, signal_mask, bckgrd_mask, weight_column=None):
895 """
896 Add a new overtraining plot, I recommend to draw only one overtraining plot at the time,
897 otherwise there are too many curves in the plot to recognize anything in the plot.
898 @param data pandas.DataFrame containing all data
899 @param column which is used to calculate distribution histogram
900 @param train_mask boolean numpy.array defining which events are training events
901 @param test_mask boolean numpy.array defining which events are test events
902 @param signal_mask boolean numpy.array defining which events are signal events
903 @param bckgrd_mask boolean numpy.array defining which events are background events
904 @param weight_column column in data containing the weights for each event
905 """
906 distribution = Distribution(self.figure, self.axis, normed_to_all_entries=True)
907
908 distribution.set_plot_options(self.plot_kwargs)
909 distribution.set_errorbar_options(self.errorbar_kwargs)
910 distribution.set_errorband_options(self.errorband_kwargs)
911 distribution.add(data, column, test_mask & signal_mask, weight_column)
912 distribution.add(data, column, test_mask & bckgrd_mask, weight_column)
913
914 distribution.set_plot_options(
915 {'color': distribution.plots[0][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
916 distribution.set_fill_options({'color': distribution.plots[0][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
917 distribution.set_errorbar_options(None)
918 distribution.set_errorband_options(None)
919 distribution.add(data, column, train_mask & signal_mask, weight_column)
920 distribution.set_plot_options(
921 {'color': distribution.plots[1][0][0].get_color(), 'linestyle': '-', 'lw': 4, 'drawstyle': 'steps-mid'})
922 distribution.set_fill_options({'color': distribution.plots[1][0][0].get_color(), 'alpha': 0.5, 'step': 'post'})
923 distribution.add(data, column, train_mask & bckgrd_mask, weight_column)
924
925 distribution.labels = ['Test-Signal', 'Test-Background', 'Train-Signal', 'Train-Background']
926 distribution.finish()
927
928 self.plot_kwargs['color'] = distribution.plots[0][0][0].get_color()
929 difference_signal = Difference(self.figure, self.axis_d1, shift_to_zero=True, normed=True)
930 difference_signal.set_plot_options(self.plot_kwargs)
931 difference_signal.set_errorbar_options(self.errorbar_kwargs)
932 difference_signal.set_errorband_options(self.errorband_kwargs)
933 difference_signal.add(data, column, train_mask & signal_mask, test_mask & signal_mask, weight_column)
934 self.axis_d1.set_xlim((difference_signal.xmin, difference_signal.xmax))
935 self.axis_d1.set_ylim((difference_signal.ymin, difference_signal.ymax))
936 difference_signal.plots = difference_signal.labels = []
937 difference_signal.finish(line_color=distribution.plots[0][0][0].get_color())
938
939 self.plot_kwargs['color'] = distribution.plots[1][0][0].get_color()
940 difference_bckgrd = Difference(self.figure, self.axis_d2, shift_to_zero=True, normed=True)
941 difference_bckgrd.set_plot_options(self.plot_kwargs)
942 difference_bckgrd.set_errorbar_options(self.errorbar_kwargs)
943 difference_bckgrd.set_errorband_options(self.errorband_kwargs)
944 difference_bckgrd.add(data, column, train_mask & bckgrd_mask, test_mask & bckgrd_mask, weight_column)
945 self.axis_d2.set_xlim((difference_bckgrd.xmin, difference_bckgrd.xmax))
946 self.axis_d2.set_ylim((difference_bckgrd.ymin, difference_bckgrd.ymax))
947 difference_bckgrd.plots = difference_bckgrd.labels = []
948 difference_bckgrd.finish(line_color=distribution.plots[1][0][0].get_color())
949
950 try:
951 import scipy.stats
952 # Kolmogorov smirnov test
953 if len(data[column][train_mask & signal_mask]) == 0 or len(data[column][test_mask & signal_mask]) == 0:
954 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for signal due to missing data")
955 else:
956 ks = scipy.stats.ks_2samp(data[column][train_mask & signal_mask], data[column][test_mask & signal_mask])
957 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
958 self.axis_d1.text(0.1, 0.9, r'signal (train - test) difference $p={:.2f}$'.format(ks[1]), fontsize=36, bbox=props,
959 verticalalignment='top', horizontalalignment='left', transform=self.axis_d1.transAxes)
960 if len(data[column][train_mask & bckgrd_mask]) == 0 or len(data[column][test_mask & bckgrd_mask]) == 0:
961 b2.B2WARNING("Cannot calculate kolmogorov smirnov test for background due to missing data")
962 else:
963 ks = scipy.stats.ks_2samp(data[column][train_mask & bckgrd_mask], data[column][test_mask & bckgrd_mask])
964 props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
965 self.axis_d2.text(0.1, 0.9, r'background (train - test) difference $p={:.2f}$'.format(ks[1]), fontsize=36,
966 bbox=props,
967 verticalalignment='top', horizontalalignment='left', transform=self.axis_d2.transAxes)
968 except ImportError:
969 b2.B2WARNING("Cannot calculate kolmogorov smirnov test please install scipy!")
970
971 return self
972

◆ finish()

def finish (   self)
Sets limits, title, axis-labels and legend of the plot

Reimplemented from Plotter.

Definition at line 973 of file plotting.py.

973 def finish(self):
974 """
975 Sets limits, title, axis-labels and legend of the plot
976 """
977 self.axis.set_title("Overtraining Plot")
978 self.axis_d1.set_title("")
979 self.axis_d2.set_title("")
980 matplotlib.artist.setp(self.axis.get_xticklabels(), visible=False)
981 matplotlib.artist.setp(self.axis_d1.get_xticklabels(), visible=False)
982 self.axis.get_xaxis().set_label_text('')
983 self.axis_d1.get_xaxis().set_label_text('')
984 self.axis_d2.get_xaxis().set_label_text('Classifier Output')
985 return self
986
987

Member Data Documentation

◆ axis [1/2]

None axis = None
static

Main axis which is used to draw.

Definition at line 866 of file plotting.py.

◆ axis [2/2]

axis

define first subplot

Definition at line 886 of file plotting.py.

◆ axis_d1 [1/2]

None axis_d1 = None
static

Axis which shows the difference between training and test signal.

Definition at line 868 of file plotting.py.

◆ axis_d1 [2/2]

axis_d1

define second subplot

Definition at line 888 of file plotting.py.

◆ axis_d2 [1/2]

None axis_d2 = None
static

Axis which shows the difference between training and test background.

Definition at line 870 of file plotting.py.

◆ axis_d2 [2/2]

axis_d2

define third subplot

Definition at line 890 of file plotting.py.

◆ figure [1/2]

None figure = None
static

figure which is used to draw

Definition at line 864 of file plotting.py.

◆ figure [2/2]

figure

create figure

Definition at line 879 of file plotting.py.


The documentation for this class was generated from the following file: