13 def __init__(self, bst=None, params=None, X_valid=None, y_valid=None, path='LGBM.txt', trainFraction=0.8, num_round=100):
14 """ Constructor of the state object """
29def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
31 Create and return a state object containing the model
and other necessary functions
34 param = {
'num_leaves': 31,
35 'objective':
'regression',
38 'deterministic':
True,
45 if isinstance(parameters, dict):
46 param = {key: parameters[key]
if key
in param
else value
for key, value
in param.items()}
47 state =
State(params=param, path=str(parameters[
'path']), trainFraction=float(
48 parameters[
'trainFraction']))
52def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
59def feature_importance(state):
61 Return a list containing the feature importances
63 return state.bst.feature_importance(
'gain').tolist()
66def partial_fit(state, X, S, y, w, epoch, batch):
69 1.randomly shuffle data
74 shuffled_indices = np.random.permutation(X.shape[0])
75 split_index = int(X.shape[0] * state.trainFraction)
77 state.train_set = lgb.Dataset(X[shuffled_indices[:split_index]],
78 label=y[shuffled_indices[:split_index]],
79 weight=w[shuffled_indices[:split_index]])
81 state.validation_set = state.train_set.create_valid(
82 X[shuffled_indices[split_index:]], label=y[shuffled_indices[split_index:]], weight=w[shuffled_indices[split_index:]])
84 if (state.trainFraction != 1):
85 state.bst = lgb.train(state.params, state.train_set, valid_sets=[state.validation_set])
87 state.bst = lgb.train(state.params, state.train_set)
94 End training process and write weights & hyperparameters into root file
96 with tempfile.TemporaryDirectory()
as path:
97 state.bst.save_model(os.path.join(path, state.path))
98 file_names = [state.path]
100 for file_name
in file_names:
101 with open(os.path.join(path, file_name),
'rb')
as file:
102 files.append(file.read())
103 params = state.params
105 return [file_names, files, params]
110 Load the trained model into state
112 with tempfile.TemporaryDirectory()
as path:
114 for file_index, file_name
in enumerate(file_names):
115 with open(f
'{path}/{file_name}',
'w+b')
as file:
116 file.write(bytes(obj[1][file_index]))
118 bst = lgb.Booster(model_file=os.path.join(path, file_names[0]))
121 state.params = obj[2]
127 Apply model to data and make prediction
129 outputs = state.bst.predict(X)
130 return np.require(outputs, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
trainFraction
train fraction for dataset splitting
params
LightGBM Model parameter.
def __init__(self, bst=None, params=None, X_valid=None, y_valid=None, path='LGBM.txt', trainFraction=0.8, num_round=100)