13 def __init__(self, bst=None, params=None, X_valid=None, y_valid=None, path='LGBM.txt', trainFraction=0.8, num_round=100):
14 """ Constructor of the state object """
29def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
31 Create and return a state object containing the model
and other necessary functions
34 param = {
'num_leaves': 31,
35 'objective':
'regression',
38 'deterministic':
True,
46 if isinstance(parameters, dict):
47 param = {key: parameters[key]
if key
in param
else value
for key, value
in param.items()}
48 state =
State(params=param, path=str(parameters[
'path']), trainFraction=float(
49 parameters[
'trainFraction']))
53def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
60def feature_importance(state):
62 Return a list containing the feature importances
64 return state.bst.feature_importance(
'gain').tolist()
67def partial_fit(state, X, S, y, w, epoch, batch):
70 1.randomly shuffle data
75 shuffled_indices = np.random.permutation(X.shape[0])
76 split_index = int(X.shape[0] * state.trainFraction)
78 state.train_set = lgb.Dataset(X[shuffled_indices[:split_index]],
79 label=y[shuffled_indices[:split_index]],
80 weight=w[shuffled_indices[:split_index]])
82 state.validation_set = state.train_set.create_valid(
83 X[shuffled_indices[split_index:]], label=y[shuffled_indices[split_index:]], weight=w[shuffled_indices[split_index:]])
85 if (state.trainFraction != 1):
86 state.bst = lgb.train(state.params, state.train_set, valid_sets=[state.validation_set])
88 state.bst = lgb.train(state.params, state.train_set)
95 End training process and write weights & hyperparameters into root file
97 with tempfile.TemporaryDirectory()
as path:
98 state.bst.save_model(os.path.join(path, state.path))
99 file_names = [state.path]
101 for file_name
in file_names:
102 with open(os.path.join(path, file_name),
'rb')
as file:
103 files.append(file.read())
104 params = state.params
106 return [file_names, files, params]
111 Load the trained model into state
113 with tempfile.TemporaryDirectory()
as path:
115 for file_index, file_name
in enumerate(file_names):
116 with open(f
'{path}/{file_name}',
'w+b')
as file:
117 file.write(bytes(obj[1][file_index]))
119 bst = lgb.Booster(model_file=os.path.join(path, file_names[0]))
122 state.params = obj[2]
128 Apply model to data and make prediction
130 outputs = state.bst.predict(X)
131 return np.require(outputs, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
trainFraction
train fraction for dataset splitting
params
LightGBM Model parameter.
def __init__(self, bst=None, params=None, X_valid=None, y_valid=None, path='LGBM.txt', trainFraction=0.8, num_round=100)