Belle II Software  release-05-01-25
preprocessing.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Dennis Weyland 2017
5 
6 import numpy as np
7 
8 
10  """
11  This class provides a fast implementation of equal frequency binning.
12  In Equal frequency binning the binning is chosen in a way that every bin has the same number of entries.
13  An example with a Neural Network can be found in: mva/examples/keras/preprocessing.py
14  """
15 
16  def __init__(self, state=None):
17  """
18  Init the class.
19  If you saved a state before and wants to rebuild the class use the state parameter.
20  """
21  if state is None:
22 
23  self.state = {'binning_array': [], 'number_of_bins': 0}
24  else:
25  self.state = state
26 
27  def fit(self, x, number_of_bins=100):
28  """
29  Do the fitting -> calculate binning boundaries
30  """
31  for variable in range(len(x[0, :])):
32  self.state['binning_array'].append(np.percentile(np.nan_to_num(x[:, variable]),
33  np.linspace(0, 100, number_of_bins + 1)))
34  self.state['number_of_bins'] = number_of_bins
35 
36  def apply(self, x):
37  """
38  Bin a dataset
39  """
40  for variable in range(len(x[0, :])):
41  x[:, variable] = np.digitize(np.nan_to_num(x[:, variable]),
42  self.state['binning_array'][variable][1:-1]) / self.state['number_of_bins']
43  return x
44 
45  def export_state(self):
46  """
47  Returns a pickable dictionary to save the state of the class in a mva weightfile
48  """
49  return self.state
preprocessing.fast_equal_frequency_binning.apply
def apply(self, x)
Definition: preprocessing.py:36
preprocessing.fast_equal_frequency_binning.__init__
def __init__(self, state=None)
Definition: preprocessing.py:16
preprocessing.fast_equal_frequency_binning.state
state
State of the class.
Definition: preprocessing.py:23
preprocessing.fast_equal_frequency_binning.export_state
def export_state(self)
Definition: preprocessing.py:45
preprocessing.fast_equal_frequency_binning
Definition: preprocessing.py:9
preprocessing.fast_equal_frequency_binning.fit
def fit(self, x, number_of_bins=100)
Definition: preprocessing.py:27