Belle II Software  release-08-01-10
preprocessing.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import numpy as np
12 
13 
15  """
16  This class provides a fast implementation of equal frequency binning.
17  In Equal frequency binning the binning is chosen in a way that every bin has the same number of entries.
18  An example with a Neural Network can be found in: mva/examples/keras/preprocessing.py
19  """
20 
21  def __init__(self, state=None):
22  """
23  Init the class.
24  If you saved a state before and wants to rebuild the class use the state parameter.
25  """
26  if state is None:
27 
28  self.statestate = {'binning_array': [], 'number_of_bins': 0}
29  else:
30  self.statestate = state
31 
32  def fit(self, x, number_of_bins=100):
33  """
34  Do the fitting -> calculate binning boundaries
35  """
36  for variable in range(len(x[0, :])):
37  self.statestate['binning_array'].append(np.percentile(np.nan_to_num(x[:, variable]),
38  np.linspace(0, 100, number_of_bins + 1)))
39  self.statestate['number_of_bins'] = number_of_bins
40 
41  def apply(self, x):
42  """
43  Bin a dataset
44  """
45  for variable in range(len(x[0, :])):
46  x[:, variable] = np.digitize(np.nan_to_num(x[:, variable]),
47  self.statestate['binning_array'][variable][1:-1]) / self.statestate['number_of_bins']
48  return x
49 
50  def export_state(self):
51  """
52  Returns a pickable dictionary to save the state of the class in a mva weightfile
53  """
54  return self.statestate
def fit(self, x, number_of_bins=100)