Belle II Software  release-08-01-10
hep_ml.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import numpy as np
12 
13 try:
14  import pandas
15 except ImportError:
16  print("Please install pandas: pip3 install pandas")
17  import sys
18  sys.exit(1)
19 
20 try:
21  import hep_ml
22  import hep_ml.uboost
23 except ImportError:
24  print("Please install hep_ml: pip3 install hep_ml")
25  import sys
26  sys.exit(1)
27 
28 import collections
29 from basf2 import B2WARNING
30 
31 
32 class State:
33  """
34  hep_ml state
35  """
36 
37  def __init__(self, estimator=None):
38  """ Constructor of the state object """
39 
40  self.estimatorestimator = estimator
41 
42 
43 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
44  """
45  Create hep_ml classifier and store it in a State object.
46  The features are used as train_features in uboost and the spectators are used as uniform_features.
47  You can pass additional parameters as a json-encoded string via m_config to the model.
48  I assume that the parameters are passed as a dictionary,
49  the key 'base_estimator' is passed to DecisionTreeClassifier as keyword arguments
50  other keys are passed to uBoostClassifier as keyword arguments
51  """
52  if isinstance(parameters, collections.Mapping) and 'base_estimator' in parameters:
53  base_tree = hep_ml.uboost.DecisionTreeClassifier(**parameters['base_estimator'])
54  del parameters['base_estimator']
55  else:
56  base_tree = hep_ml.uboost.DecisionTreeClassifier(max_depth=3)
57 
58  train_features = list(range(number_of_features))
59  uniform_features = [number_of_features + i for i in range(number_of_spectators)]
60 
61  if isinstance(parameters, collections.Mapping):
62  if 'uniform_label' not in parameters:
63  parameters['uniform_label'] = [0, 1]
64  parameters['train_features'] = train_features
65  parameters['uniform_features'] = uniform_features
66  clf = hep_ml.uboost.uBoostClassifier(base_estimator=base_tree, **parameters)
67  else:
68  clf = hep_ml.uboost.uBoostClassifier(uniform_features=uniform_features, uniform_label=[0, 1],
69  base_estimator=base_tree, train_features=train_features)
70  return State(clf)
71 
72 
73 def feature_importance(state):
74  """
75  Return a list containing the feature importances
76  """
77  return []
78 
79 
80 def load(obj):
81  """
82  Load sklearn estimator into state
83  """
84  return State(obj)
85 
86 
87 def apply(state, X):
88  """
89  Apply estimator to passed data.
90  If the estimator has a predict_proba it is called, otherwise call just predict.
91  """
92  X = pandas.DataFrame(X)
93  if hasattr(state.estimator, 'predict_proba'):
94  x = state.estimator.predict_proba(X)[:, 1]
95  else:
96  x = state.estimator.predict(X)
97  return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
98 
99 
100 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
101  """
102  Initialize lists which will store the received data
103  """
104  state.X = []
105  state.S = []
106  state.y = []
107  state.w = []
108  return state
109 
110 
111 def partial_fit(state, X, S, y, w, epoch, batch):
112  """
113  Stores received training data.
114  HepML is usually not able to perform a partial fit.
115  """
116  if epoch > 0:
117  B2WARNING("The hep_ml training interface has been called with specific_options.m_nIterations > 1."
118  " This means duplicates of the training sample will be used during training.")
119 
120  state.X.append(X)
121  state.S.append(S)
122  state.y.append(y.flatten())
123  state.w.append(w.flatten())
124  return True
125 
126 
127 def end_fit(state):
128  """
129  Merge received data together and fit estimator
130  """
131  X = pandas.DataFrame(np.hstack([np.vstack(state.X), np.vstack(state.S)]))
132  state.estimator = state.estimator.fit(X, np.hstack(state.y), np.hstack(state.w))
133  return state.estimator
estimator
Pickable sklearn estimator.
Definition: hep_ml.py:40
def __init__(self, estimator=None)
Definition: hep_ml.py:37