Belle II Software development
preprocessing.py
1#!/usr/bin/env python3
2
3
10
11import numpy as np
12
13
15 """
16 This class provides a fast implementation of equal frequency binning.
17 In Equal frequency binning the binning is chosen in a way that every bin has the same number of entries.
18 An example with a Neural Network can be found in: mva/examples/keras/preprocessing.py
19 """
20
21 def __init__(self, state=None):
22 """
23 Init the class.
24 If you saved a state before and wants to rebuild the class use the state parameter.
25 """
26 if state is None:
27
28 self.state = {'binning_array': [], 'number_of_bins': 0}
29 else:
30 self.state = state
31
32 def fit(self, x, number_of_bins=100):
33 """
34 Do the fitting -> calculate binning boundaries
35 """
36 for variable in range(len(x[0, :])):
37 self.state['binning_array'].append(np.percentile(np.nan_to_num(x[:, variable]),
38 np.linspace(0, 100, number_of_bins + 1)))
39 self.state['number_of_bins'] = number_of_bins
40
41 def apply(self, x):
42 """
43 Bin a dataset
44 """
45 for variable in range(len(x[0, :])):
46 x[:, variable] = np.digitize(np.nan_to_num(x[:, variable]),
47 self.state['binning_array'][variable][1:-1]) / self.state['number_of_bins']
48 return x
49
50 def export_state(self):
51 """
52 Returns a pickable dictionary to save the state of the class in a mva weightfile
53 """
54 return self.state
def fit(self, x, number_of_bins=100)