Belle II Software  release-06-01-15
hep_ml.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import numpy as np
13 
14 try:
15  import pandas
16 except ImportError:
17  print("Please install pandas: pip3 install pandas")
18  import sys
19  sys.exit(1)
20 
21 try:
22  import hep_ml
23  import hep_ml.uboost
24 except ImportError:
25  print("Please install hep_ml: pip3 install hep_ml")
26  import sys
27  sys.exit(1)
28 
29 import collections
30 
31 
32 class State(object):
33  """
34  hep_ml state
35  """
36  def __init__(self, estimator=None):
37  """ Constructor of the state object """
38 
39  self.estimatorestimator = estimator
40 
41 
42 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
43  """
44  Create hep_ml classifier and store it in a State object.
45  The features are used as train_features in uboost and the spectators are used as uniform_features.
46  You can pass additional parameters as a json-encoded string via m_config to the model.
47  I assume that the parameters are passed as a dictionary,
48  the key 'base_estimator' is passed to DecisionTreeClassifier as keyword arguments
49  other keys are passed to uBoostClassifier as keyword arguments
50  """
51  if isinstance(parameters, collections.Mapping) and 'base_estimator' in parameters:
52  base_tree = hep_ml.uboost.DecisionTreeClassifier(**parameters['base_estimator'])
53  del parameters['base_estimator']
54  else:
55  base_tree = hep_ml.uboost.DecisionTreeClassifier(max_depth=3)
56 
57  train_features = list(range(number_of_features))
58  uniform_features = [number_of_features + i for i in range(number_of_spectators)]
59 
60  if isinstance(parameters, collections.Mapping):
61  if 'uniform_label' not in parameters:
62  parameters['uniform_label'] = [0, 1]
63  parameters['train_features'] = train_features
64  parameters['uniform_features'] = uniform_features
65  clf = hep_ml.uboost.uBoostClassifier(base_estimator=base_tree, **parameters)
66  else:
67  clf = hep_ml.uboost.uBoostClassifier(uniform_features=uniform_features, uniform_label=[0, 1],
68  base_estimator=base_tree, train_features=train_features)
69  return State(clf)
70 
71 
72 def feature_importance(state):
73  """
74  Return a list containing the feature importances
75  """
76  return []
77 
78 
79 def load(obj):
80  """
81  Load sklearn estimator into state
82  """
83  return State(obj)
84 
85 
86 def apply(state, X):
87  """
88  Apply estimator to passed data.
89  If the estimator has a predict_proba it is called, otherwise call just predict.
90  """
91  X = pandas.DataFrame(X)
92  if hasattr(state.estimator, 'predict_proba'):
93  x = state.estimator.predict_proba(X)[:, 1]
94  else:
95  x = state.estimator.predict(X)
96  return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
97 
98 
99 def begin_fit(state, X, S, y, w):
100  """
101  Initialize lists which will store the received data
102  """
103  state.X = []
104  state.S = []
105  state.y = []
106  state.w = []
107  return state
108 
109 
110 def partial_fit(state, X, S, y, w, epoch):
111  """
112  Stores received training data.
113  HepML is usually not able to perform a partial fit.
114  """
115  state.X.append(X)
116  state.S.append(S)
117  state.y.append(y.flatten())
118  state.w.append(w.flatten())
119  return True
120 
121 
122 def end_fit(state):
123  """
124  Merge received data together and fit estimator
125  """
126  X = pandas.DataFrame(np.hstack([np.vstack(state.X), np.vstack(state.S)]))
127  state.estimator = state.estimator.fit(X, np.hstack(state.y), np.hstack(state.w))
128  return state.estimator
estimator
Pickable sklearn estimator.
Definition: hep_ml.py:39
def __init__(self, estimator=None)
Definition: hep_ml.py:36