Belle II Software  light-2212-foldex
xgboost.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import numpy as np
13 
14 try:
15  import xgboost as xgb
16 except ImportError:
17  print("Please install xgboost: pip3 install xgboost")
18  import sys
19  sys.exit(1)
20 
21 import os
22 import tempfile
23 import collections
24 
25 
26 class State(object):
27  """
28  XGBoost state
29  """
30 
31  def __init__(self, num_round=0, parameters=None):
32  """ Constructor of the state object """
33 
34  self.parametersparameters = parameters
35 
36  self.num_roundnum_round = num_round
37 
38  self.estimatorestimator = None
39 
40 
41 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
42  """
43  Return default xgboost model
44  """
45  param = {'bst:max_depth': 2, 'bst:eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
46  nTrees = 100
47  if 'nTrees' in parameters:
48  nTrees = parameters['nTrees']
49  del parameters['nTrees']
50  if isinstance(parameters, collections.Mapping):
51  param.update(parameters)
52  return State(nTrees, param)
53 
54 
55 def feature_importance(state):
56  """
57  Return a list containing the feature importances
58  """
59  return []
60 
61 
62 def load(obj):
63  """
64  Load XGBoost estimator into state
65  """
66  state = State()
67  f = tempfile.NamedTemporaryFile(delete=False)
68  f.write(obj)
69  f.close()
70  state.estimator = xgb.Booster({})
71  state.estimator.load_model(f.name)
72  os.unlink(f.name)
73  return state
74 
75 
76 def apply(state, X):
77  """
78  Apply estimator to passed data.
79  """
80  data = xgb.DMatrix(X)
81  result = state.estimator.predict(data)
82  return np.require(result, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
83 
84 
85 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
86  """
87  Initialize lists which will store the received data
88  """
89  state.X = []
90  state.y = []
91  state.w = []
92  state.Xtest = Xtest
93  state.ytest = ytest.flatten()
94  state.wtest = wtest.flatten()
95  return state
96 
97 
98 def partial_fit(state, X, S, y, w, epoch, batch):
99  """
100  Stores received training data.
101  XGBoost is usually not able to perform a partial fit.
102  """
103  state.X.append(X)
104  state.y.append(y.flatten())
105  state.w.append(w.flatten())
106  return True
107 
108 
109 def end_fit(state):
110  """
111  Merge received data together and fit estimator
112  """
113  dtrain = xgb.DMatrix(np.vstack(state.X), label=np.hstack(state.y).astype(int), weight=np.hstack(state.w))
114 
115  if len(state.Xtest) > 0:
116  dtest = xgb.DMatrix(state.Xtest, label=state.ytest.astype(int), weight=state.wtest)
117  evallist = [(dtest, 'eval'), (dtrain, 'train')]
118  else:
119  evallist = [(dtrain, 'train')]
120 
121  state.estimator = xgb.train(state.parameters, dtrain, state.num_round, evallist)
122  f = tempfile.NamedTemporaryFile(delete=False)
123  f.close()
124  state.estimator.save_model(f.name)
125  with open(f.name, 'rb') as f2:
126  content = f2.read()
127  os.unlink(f.name)
128  return content
num_round
Number of boosting rounds used in xgboost training.
Definition: xgboost.py:36
def __init__(self, num_round=0, parameters=None)
Definition: xgboost.py:31
parameters
Parameters passed to xgboost model.
Definition: xgboost.py:34