Belle II Software  release-05-01-25
hep_ml.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2017
5 
6 import numpy as np
7 
8 try:
9  import pandas
10 except ImportError:
11  print("Please install pandas: pip3 install pandas")
12  import sys
13  sys.exit(1)
14 
15 try:
16  import hep_ml
17  import hep_ml.uboost
18 except ImportError:
19  print("Please install hep_ml: pip3 install hep_ml")
20  import sys
21  sys.exit(1)
22 
23 import collections
24 
25 
26 class State(object):
27  """
28  hep_ml state
29  """
30  def __init__(self, estimator=None):
31  """ Constructor of the state object """
32 
33  self.estimator = estimator
34 
35 
36 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
37  """
38  Create hep_ml classifier and store it in a State object.
39  The features are used as train_features in uboost and the spectators are used as uniform_features.
40  You can pass additional parameters as a json-encoded string via m_config to the model.
41  I assume that the parameters are passed as a dictionary,
42  the key 'base_estimator' is passed to DecisionTreeClassifier as keyword arguments
43  other keys are passed to uBoostClassifier as keyword arguments
44  """
45  if isinstance(parameters, collections.Mapping) and 'base_estimator' in parameters:
46  base_tree = hep_ml.uboost.DecisionTreeClassifier(**parameters['base_estimator'])
47  del parameters['base_estimator']
48  else:
49  base_tree = hep_ml.uboost.DecisionTreeClassifier(max_depth=3)
50 
51  train_features = list(range(number_of_features))
52  uniform_features = [number_of_features + i for i in range(number_of_spectators)]
53 
54  if isinstance(parameters, collections.Mapping):
55  if 'uniform_label' not in parameters:
56  parameters['uniform_label'] = [0, 1]
57  parameters['train_features'] = train_features
58  parameters['uniform_features'] = uniform_features
59  clf = hep_ml.uboost.uBoostClassifier(base_estimator=base_tree, **parameters)
60  else:
61  clf = hep_ml.uboost.uBoostClassifier(uniform_features=uniform_features, uniform_label=[0, 1],
62  base_estimator=base_tree, train_features=train_features)
63  return State(clf)
64 
65 
66 def feature_importance(state):
67  """
68  Return a list containing the feature importances
69  """
70  return []
71 
72 
73 def load(obj):
74  """
75  Load sklearn estimator into state
76  """
77  return State(obj)
78 
79 
80 def apply(state, X):
81  """
82  Apply estimator to passed data.
83  If the estimator has a predict_proba it is called, otherwise call just predict.
84  """
85  X = pandas.DataFrame(X)
86  if hasattr(state.estimator, 'predict_proba'):
87  x = state.estimator.predict_proba(X)[:, 1]
88  else:
89  x = state.estimator.predict(X)
90  return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
91 
92 
93 def begin_fit(state, X, S, y, w):
94  """
95  Initialize lists which will store the received data
96  """
97  state.X = []
98  state.S = []
99  state.y = []
100  state.w = []
101  return state
102 
103 
104 def partial_fit(state, X, S, y, w, epoch):
105  """
106  Stores received training data.
107  HepML is usually not able to perform a partial fit.
108  """
109  state.X.append(X)
110  state.S.append(S)
111  state.y.append(y.flatten())
112  state.w.append(w.flatten())
113  return True
114 
115 
116 def end_fit(state):
117  """
118  Merge received data together and fit estimator
119  """
120  X = pandas.DataFrame(np.hstack([np.vstack(state.X), np.vstack(state.S)]))
121  state.estimator = state.estimator.fit(X, np.hstack(state.y), np.hstack(state.w))
122  return state.estimator
basf2_mva_python_interface.hep_ml.State
Definition: hep_ml.py:26
basf2_mva_python_interface.hep_ml.State.estimator
estimator
Pickable sklearn estimator.
Definition: hep_ml.py:33
basf2_mva_python_interface.hep_ml.State.__init__
def __init__(self, estimator=None)
Definition: hep_ml.py:30