16 print(
"Please install pandas: pip3 install pandas")
24 print(
"Please install hep_ml: pip3 install hep_ml")
29 from basf2
import B2WARNING
38 """ Constructor of the state object """
43 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
45 Create hep_ml classifier and store it in a State object.
46 The features are used as train_features in uboost and the spectators are used as uniform_features.
47 You can pass additional parameters as a json-encoded string via m_config to the model.
48 I assume that the parameters are passed as a dictionary,
49 the key 'base_estimator' is passed to DecisionTreeClassifier as keyword arguments
50 other keys are passed to uBoostClassifier as keyword arguments
52 if isinstance(parameters, collections.Mapping)
and 'base_estimator' in parameters:
53 base_tree = hep_ml.uboost.DecisionTreeClassifier(**parameters[
'base_estimator'])
54 del parameters[
'base_estimator']
56 base_tree = hep_ml.uboost.DecisionTreeClassifier(max_depth=3)
58 train_features = list(range(number_of_features))
59 uniform_features = [number_of_features + i
for i
in range(number_of_spectators)]
61 if isinstance(parameters, collections.Mapping):
62 if 'uniform_label' not in parameters:
63 parameters[
'uniform_label'] = [0, 1]
64 parameters[
'train_features'] = train_features
65 parameters[
'uniform_features'] = uniform_features
66 clf = hep_ml.uboost.uBoostClassifier(base_estimator=base_tree, **parameters)
68 clf = hep_ml.uboost.uBoostClassifier(uniform_features=uniform_features, uniform_label=[0, 1],
69 base_estimator=base_tree, train_features=train_features)
73 def feature_importance(state):
75 Return a list containing the feature importances
82 Load sklearn estimator into state
89 Apply estimator to passed data.
90 If the estimator has a predict_proba it is called, otherwise call just predict.
92 X = pandas.DataFrame(X)
93 if hasattr(state.estimator,
'predict_proba'):
94 x = state.estimator.predict_proba(X)[:, 1]
96 x = state.estimator.predict(X)
97 return np.require(x, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
100 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
102 Initialize lists which will store the received data
111 def partial_fit(state, X, S, y, w, epoch, batch):
113 Stores received training data.
114 HepML is usually not able to perform a partial fit.
117 B2WARNING(
"The hep_ml training interface has been called with specific_options.m_nIterations > 1."
118 " This means duplicates of the training sample will be used during training.")
122 state.y.append(y.flatten())
123 state.w.append(w.flatten())
129 Merge received data together and fit estimator
131 X = pandas.DataFrame(np.hstack([np.vstack(state.X), np.vstack(state.S)]))
132 state.estimator = state.estimator.fit(X, np.hstack(state.y), np.hstack(state.w))
133 return state.estimator
estimator
Pickable sklearn estimator.
def __init__(self, estimator=None)