Belle II Software  release-06-01-15
preprocessing.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import numpy as np
13 
14 
16  """
17  This class provides a fast implementation of equal frequency binning.
18  In Equal frequency binning the binning is chosen in a way that every bin has the same number of entries.
19  An example with a Neural Network can be found in: mva/examples/keras/preprocessing.py
20  """
21 
22  def __init__(self, state=None):
23  """
24  Init the class.
25  If you saved a state before and wants to rebuild the class use the state parameter.
26  """
27  if state is None:
28 
29  self.statestate = {'binning_array': [], 'number_of_bins': 0}
30  else:
31  self.statestate = state
32 
33  def fit(self, x, number_of_bins=100):
34  """
35  Do the fitting -> calculate binning boundaries
36  """
37  for variable in range(len(x[0, :])):
38  self.statestate['binning_array'].append(np.percentile(np.nan_to_num(x[:, variable]),
39  np.linspace(0, 100, number_of_bins + 1)))
40  self.statestate['number_of_bins'] = number_of_bins
41 
42  def apply(self, x):
43  """
44  Bin a dataset
45  """
46  for variable in range(len(x[0, :])):
47  x[:, variable] = np.digitize(np.nan_to_num(x[:, variable]),
48  self.statestate['binning_array'][variable][1:-1]) / self.statestate['number_of_bins']
49  return x
50 
51  def export_state(self):
52  """
53  Returns a pickable dictionary to save the state of the class in a mva weightfile
54  """
55  return self.statestate
def fit(self, x, number_of_bins=100)