Belle II Software development
lightgbm.py
1
2import numpy as np
3import lightgbm as lgb
4import os
5import tempfile
6
7
8class State(object):
9 """
10 LGBM state
11 """
12
13 def __init__(self, bst=None, params=None, X_valid=None, y_valid=None, path='LGBM.txt', trainFraction=0.8, num_round=100):
14 """ Constructor of the state object """
15
16 self.X_valid = X_valid
17
18 self.y_valid = y_valid
19
20 self.path = path
21
22 self.params = params
23
24 self.bst = bst
25
26 self.trainFraction = trainFraction
27
28
29def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
30 """
31 Create and return a state object containing the model and other necessary functions
32 """
33 # hyper parameters for lgbm
34 param = {'num_leaves': 31,
35 'objective': 'regression',
36 'learning_rate': 0.1,
37 'device_type': "cpu",
38 'deterministic': True,
39 'metric': 'auc',
40 'num_round': 100,
41 'max_bin': 255,
42 'boosting': "gbdt",
43 }
44
45 if isinstance(parameters, dict):
46 param = {key: parameters[key] if key in param else value for key, value in param.items()}
47 state = State(params=param, path=str(parameters['path']), trainFraction=float(
48 parameters['trainFraction'])) # ,stop_round = int(parameters['stop_round']))
49 return state
50
51
52def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
53 """
54 Begin fit, do nothing
55 """
56 return state
57
58
59def feature_importance(state):
60 """
61 Return a list containing the feature importances
62 """
63 return state.bst.feature_importance('gain').tolist()
64
65
66def partial_fit(state, X, S, y, w, epoch, batch):
67 """
68 Full fitting process:
69 1.randomly shuffle data
70 2.build LGBM dataset
71 3.run training
72 """
73 # randomly split
74 shuffled_indices = np.random.permutation(X.shape[0])
75 split_index = int(X.shape[0] * state.trainFraction)
76
77 state.train_set = lgb.Dataset(X[shuffled_indices[:split_index]],
78 label=y[shuffled_indices[:split_index]],
79 weight=w[shuffled_indices[:split_index]])
80
81 state.validation_set = state.train_set.create_valid(
82 X[shuffled_indices[split_index:]], label=y[shuffled_indices[split_index:]], weight=w[shuffled_indices[split_index:]])
83 # Do training
84 if (state.trainFraction != 1):
85 state.bst = lgb.train(state.params, state.train_set, valid_sets=[state.validation_set])
86 else:
87 state.bst = lgb.train(state.params, state.train_set)
88 del shuffled_indices
89 return True
90
91
92def end_fit(state):
93 """
94 End training process and write weights & hyperparameters into root file
95 """
96 with tempfile.TemporaryDirectory() as path:
97 state.bst.save_model(os.path.join(path, state.path))
98 file_names = [state.path]
99 files = []
100 for file_name in file_names:
101 with open(os.path.join(path, file_name), 'rb') as file:
102 files.append(file.read())
103 params = state.params
104 del state
105 return [file_names, files, params]
106
107
108def load(obj):
109 """
110 Load the trained model into state
111 """
112 with tempfile.TemporaryDirectory() as path:
113 file_names = obj[0]
114 for file_index, file_name in enumerate(file_names):
115 with open(f'{path}/{file_name}', 'w+b') as file:
116 file.write(bytes(obj[1][file_index]))
117
118 bst = lgb.Booster(model_file=os.path.join(path, file_names[0])) # init model
119 state = State()
120 state.bst = bst
121 state.params = obj[2]
122 return state
123
124
125def apply(state, X):
126 """
127 Apply model to data and make prediction
128 """
129 outputs = state.bst.predict(X)
130 return np.require(outputs, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
trainFraction
train fraction for dataset splitting
Definition: lightgbm.py:26
params
LightGBM Model parameter.
Definition: lightgbm.py:22
def __init__(self, bst=None, params=None, X_valid=None, y_valid=None, path='LGBM.txt', trainFraction=0.8, num_round=100)
Definition: lightgbm.py:13