Belle II Software development
hep_ml.py
1#!/usr/bin/env python3
2
3
10
11import numpy as np
12
13try:
14 import pandas
15except ImportError:
16 print("Please install pandas: pip3 install pandas")
17 import sys
18 sys.exit(1)
19
20try:
21 import hep_ml
22 import hep_ml.uboost
23except ImportError:
24 print("Please install hep_ml: pip3 install hep_ml")
25 import sys
26 sys.exit(1)
27
28import collections
29from basf2 import B2WARNING
30
31
32class State:
33 """
34 hep_ml state
35 """
36
37 def __init__(self, estimator=None):
38 """ Constructor of the state object """
39
40 self.estimator = estimator
41
42
43def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
44 """
45 Create hep_ml classifier and store it in a State object.
46 The features are used as train_features in uboost and the spectators are used as uniform_features.
47 You can pass additional parameters as a json-encoded string via m_config to the model.
48 I assume that the parameters are passed as a dictionary,
49 the key 'base_estimator' is passed to DecisionTreeClassifier as keyword arguments
50 other keys are passed to uBoostClassifier as keyword arguments
51 """
52 if isinstance(parameters, collections.abc.Mapping) and 'base_estimator' in parameters:
53 base_tree = hep_ml.uboost.DecisionTreeClassifier(**parameters['base_estimator'])
54 del parameters['base_estimator']
55 else:
56 base_tree = hep_ml.uboost.DecisionTreeClassifier(max_depth=3)
57
58 train_features = list(range(number_of_features))
59 uniform_features = [number_of_features + i for i in range(number_of_spectators)]
60
61 if isinstance(parameters, collections.abc.Mapping):
62 if 'uniform_label' not in parameters:
63 parameters['uniform_label'] = [0, 1]
64 parameters['train_features'] = train_features
65 parameters['uniform_features'] = uniform_features
66 clf = hep_ml.uboost.uBoostClassifier(base_estimator=base_tree, **parameters)
67 else:
68 clf = hep_ml.uboost.uBoostClassifier(uniform_features=uniform_features, uniform_label=[0, 1],
69 base_estimator=base_tree, train_features=train_features)
70 return State(clf)
71
72
73def feature_importance(state):
74 """
75 Return a list containing the feature importances
76 """
77 return []
78
79
80def load(obj):
81 """
82 Load sklearn estimator into state
83 """
84 return State(obj)
85
86
87def apply(state, X):
88 """
89 Apply estimator to passed data.
90 If the estimator has a predict_proba it is called, otherwise call just predict.
91 """
92 X = pandas.DataFrame(X)
93 if hasattr(state.estimator, 'predict_proba'):
94 x = state.estimator.predict_proba(X)[:, 1]
95 else:
96 x = state.estimator.predict(X)
97 return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
98
99
100def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
101 """
102 Initialize lists which will store the received data
103 """
104 state.X = []
105 state.S = []
106 state.y = []
107 state.w = []
108 return state
109
110
111def partial_fit(state, X, S, y, w, epoch, batch):
112 """
113 Stores received training data.
114 HepML is usually not able to perform a partial fit.
115 """
116 if epoch > 0:
117 B2WARNING("The hep_ml training interface has been called with specific_options.m_nIterations > 1."
118 " This means duplicates of the training sample will be used during training.")
119
120 state.X.append(X)
121 state.S.append(S)
122 state.y.append(y.flatten())
123 state.w.append(w.flatten())
124 return True
125
126
127def end_fit(state):
128 """
129 Merge received data together and fit estimator
130 """
131 X = pandas.DataFrame(np.hstack([np.vstack(state.X), np.vstack(state.S)]))
132 state.estimator = state.estimator.fit(X, np.hstack(state.y), np.hstack(state.w))
133 return state.estimator
estimator
Pickable sklearn estimator.
Definition: hep_ml.py:40
def __init__(self, estimator=None)
Definition: hep_ml.py:37