Belle II Software development
ksSelector.py
1#!/usr/bin/env python3
2
9
10from basf2 import B2INFO, B2ERROR
11import basf2_mva
12import modularAnalysis as ma
13import variables
14from variables import utils
15
16
17def add_default_ks_Selector_aliases():
18 """
19 This function is used to set up variables aliases for ks Selector variables.
20 """
21 B2INFO('KsSelector: creating variables alias.')
22 variables.variables.addAlias('M_lambda_p', 'useAlternativeDaughterHypothesis(M, 0:p+)')
23 variables.variables.addAlias('M_lambda_antip', 'useAlternativeDaughterHypothesis(M, 1:anti-p-)')
24 variables.variables.addAlias('daughtersDeltaZ', 'daughterDiffOf(0, 1, dz)')
25 variables.variables.addAlias('cosVertexMomentum', 'cosAngleBetweenMomentumAndVertexVector')
26 variables.variables.addAlias('pip_nPXDHits', 'daughter(0,nPXDHits)')
27 variables.variables.addAlias('pin_nPXDHits', 'daughter(1,nPXDHits)')
28 variables.variables.addAlias('pip_nSVDHits', 'daughter(0,nSVDHits)')
29 variables.variables.addAlias('pin_nSVDHits', 'daughter(1,nSVDHits)')
30 variables.variables.addAlias('daughterAngleDiffInMother', 'useRestFrame(daughterAngle(0, 1))')
31 variables.variables.addAlias('pip_p', 'daughter(0,p)')
32 variables.variables.addAlias('pin_p', 'daughter(1,p)')
33 variables.variables.addAlias('pip_dr', 'daughter(0,dr)')
34 variables.variables.addAlias('pin_dr', 'daughter(1,dr)')
35 variables.variables.addAlias('pip_cosTheta', 'daughter(0,cosTheta)')
36 variables.variables.addAlias('pin_cosTheta', 'daughter(1,cosTheta)')
37 variables.variables.addAlias('pip_protonID', 'daughter(0,protonID)')
38 variables.variables.addAlias('pin_protonID', 'daughter(1,protonID)')
39
40
41def add_variable_collection():
42 """
43 Call this function to add variable collection for ksSelector.
44 """
45 add_default_ks_Selector_aliases()
46 inputVariablesList = [
47 'cosVertexMomentum',
48 'flightDistance',
49 'significanceOfDistance',
50 'cosHelicityAngleMomentum',
51 'ImpactXY',
52 'decayAngle(0)',
53 'decayAngle(1)',
54 'daughterAngleDiffInMother',
55 'daughtersDeltaZ',
56 'pip_nSVDHits', 'pip_nPXDHits',
57 'pin_nSVDHits', 'pin_nPXDHits',
58 'pip_dr', 'pin_dr',
59 'pip_protonID', 'pin_protonID',
60 'M_lambda_p', 'M_lambda_antip',
61 'pip_p', 'pin_p',
62 'pip_cosTheta', 'pin_cosTheta',
63 'ArmenterosLongitudinalMomentumAsymmetry',
64 'ArmenterosDaughter1Qt',
65 'ArmenterosDaughter2Qt'
66 ]
67 utils.add_collection(inputVariablesList, 'ks_selector_info')
68
69
70def LightGBM2ONNX(dumping_file_name,
71 output_file_name,
72 input_variable_list
73 ):
74 """
75 Convert trained LightGBM payload to onnx format. Only work for (binary or regression) loss
76 @param dumping_file_name file name for trained LightGBM payload
77 @param output_file_name file name for output onnx payload
78 @param input_variable_list model input variables list
79
80 """
81 import pickle
82 import base64
83 import ROOT
85
86 with ROOT.TFile(dumping_file_name) as f:
87 w = ROOT.Belle2.MVA.Weightfile.loadFromStream(ROOT.stringstream(f.Get("Weightfile").m_data))
88
89 with open("temp_scripts.py", "w") as f:
90 f.write(pickle.loads(base64.b64decode(w.getElement["std::string"]("Python_Steeringfile")+"====")))
91
92 w.getFile("Python_Weightfile", "Python_Weightfile.pkl")
93
94 with open("temp_scripts.py") as f:
95 exec(f.read(), basf2_mva_python_interface.lightgbm.__dict__)
96
97 with open("Python_Weightfile.pkl", "rb") as f:
98 obj = pickle.load(f)
100 model = state.bst
101
102 from onnxmltools import convert_lightgbm
103 from onnxmltools.convert.common.data_types import FloatTensorType
104
105 num_features = len(input_variable_list)
106 initial_type = [('input', FloatTensorType([None, num_features]))]
107
108 onnx_model = convert_lightgbm(model, initial_types=initial_type)
109
110 with open("temp_model.onnx", "wb") as f:
111 f.write(onnx_model.SerializeToString())
112
113 from basf2_mva_util import create_onnx_mva_weightfile
114 weightfile = create_onnx_mva_weightfile(
115 "temp_model.onnx",
116 variables=input_variable_list,
117 target_variable="isSignal",
118 )
119 import os
120 os.remove("temp_model.onnx")
121 os.remove("Python_Weightfile.pkl")
122 os.remove("temp_scripts.py")
123 weightfile.save(output_file_name)
124
125
126def V0Selector_Training(
127 train_data,
128 tree_name="tree",
129 mva_identifier="MVAFastBDT_V0Selector.root",
130 target_variable="isSignal",
131 parameters={},
132 options={}
133):
134 """
135 Defines the configuration of V0Selector Training.
136 The training data should contain K_S0 and misreconstructed K_S0 without Lambda0.
137
138 @param train_data Root file containing Ks information to be trained.
139 @param tree_name Tree name for variables.
140 @param mva_identifier Name for output MVA weight file.
141 @param target_variable Target variable for MVA training.
142 @param parameters hyperparameter for LGBM
143 @param options MVA options
144 """
145 trainVars = [
146 'cosVertexMomentum',
147 'flightDistance',
148 'significanceOfDistance',
149 'cosHelicityAngleMomentum',
150 'ImpactXY',
151 'decayAngle(0)',
152 'decayAngle(1)',
153 'daughterAngleDiffInMother',
154 'daughtersDeltaZ',
155 'pip_nSVDHits', 'pin_nSVDHits',
156 'pip_dr', 'pin_dr',
157 ]
158
159 general_options = basf2_mva.GeneralOptions()
160 general_options.m_datafiles = basf2_mva.vector(train_data)
161 general_options.m_treename = tree_name
162 general_options.m_identifier = mva_identifier
163 general_options.m_variables = basf2_mva.vector(*trainVars)
164 general_options.m_target_variable = target_variable
165 general_options.m_max_events = 0 if 'max_events' not in options else options['max_events']
166
167 python_options = basf2_mva.PythonOptions()
168 python_options.m_framework = "lightgbm"
169
170 import json
171 param = {'num_leaves': 256,
172 'learning_rate': 0.1,
173 'device_type': "cpu",
174 'deterministic': True,
175 'metric': 'auc',
176 'num_round': 100,
177 # 'stop_round' : 30,
178 'path': mva_identifier+'.txt',
179 'max_bin': 250,
180 'boosting': 'gbdt',
181 'trainFraction': 0.8,
182 'min_data_in_leaf': 4000,
183 'max_depth': 8,
184 'objective': 'cross_entropy',
185 'num_threads': 1
186 }
187 if isinstance(parameters, dict):
188 param.update(parameters)
189 config_string = json.dumps(param)
190 print("The json config string", config_string)
191 python_options.m_config = config_string
192
193 python_options.m_training_fraction = 1
194 python_options.m_normalize = False # we do it inside MVA torch
195 python_options.m_nIterations = 1
196 python_options.m_mini_batch_size = 0
197
198 basf2_mva.teacher(general_options, python_options)
199
200
201def LambdaVeto_Training(
202 train_data,
203 tree_name="tree",
204 mva_identifier="MVAFastBDT_LambdaVeto.root",
205 target_variable="isSignal",
206 parameters={},
207 options={}
208):
209 """
210 Defines the configuration of LambdaVeto Training.
211 The training data should contain only K_S0 and Lambda0.
212
213 @param train_data Root file containing Ks information to be trained.
214 @param tree_name Tree name for variables.
215 @param mva_identifier Name for output MVA weight file.
216 @param target_variable Target variable for MVA training.
217 @param parameters hyperparameter for LGBM
218 @param options MVA options
219 """
220 trainVars = [
221 'pip_protonID',
222 'pin_protonID',
223 'M_lambda_p',
224 'M_lambda_antip',
225 'pip_cosTheta',
226 'pin_cosTheta',
227 'ArmenterosLongitudinalMomentumAsymmetry',
228 'ArmenterosDaughter1Qt',
229 'ArmenterosDaughter2Qt'
230 ]
231
232 general_options = basf2_mva.GeneralOptions()
233 general_options.m_datafiles = basf2_mva.vector(train_data)
234 general_options.m_treename = tree_name
235 general_options.m_identifier = mva_identifier
236 general_options.m_variables = basf2_mva.vector(*trainVars)
237 general_options.m_target_variable = target_variable
238 general_options.m_max_events = 0 if 'max_events' not in options else options['max_events']
239
240 python_options = basf2_mva.PythonOptions()
241 python_options.m_framework = "lightgbm"
242
243 import json
244 param = {'num_leaves': 256,
245 'learning_rate': 0.2,
246 'device_type': "cpu",
247 'deterministic': True,
248 'metric': 'auc',
249 'num_round': 100,
250 # 'stop_round' : 30,
251 'path': mva_identifier+'.txt',
252 'max_bin': 250,
253 'boosting': 'dart',
254 'trainFraction': 0.8,
255 'min_data_in_leaf': 300,
256 'max_depth': 8,
257 'objective': 'cross_entropy',
258 'num_threads': 1
259 }
260 if isinstance(parameters, dict):
261 param.update(parameters)
262 config_string = json.dumps(param)
263 print("The json config string", config_string)
264 python_options.m_config = config_string
265
266 python_options.m_training_fraction = 1
267 python_options.m_normalize = False # we do it inside MVA torch
268 python_options.m_nIterations = 1
269 python_options.m_mini_batch_size = 0
270
271 basf2_mva.teacher(general_options, python_options)
272
273# ****************************************
274# KS Selector MAIN FUNCTION
275# ****************************************
276
277
278def ksSelector(
279 particleListName,
280 identifier_Ks="Ks_LGBM_V0Selector_MC16",
281 identifier_vLambda="Ks_LGBM_LambdaVeto_MC16",
282 output_label_name='',
283 extraInfoName_V0Selector='KsSelector_V0Selector',
284 extraInfoName_LambdaVeto='KsSelector_LambdaVeto',
285 useCustomThreshold=False,
286 threshold_V0Selector=0.92,
287 threshold_LambdaVeto=0.11,
288 path=None
289):
290 """
291 This function will apply K_S0 selection MVA on the given particleList.
292 By default this function appends MVA output as a extraInfo for the given particleList.
293 You can apply preset cut or custom cut by giving parameters. In this case,
294 a new particleList is created from the original particleList applying cuts on the MVA output.
295
296 @param particleLists Reconstructed Ks -> pi+ pi- list.
297 @param output_label_name Label of the returned Ks particleList.
298 When empty '', no cut is applied and new particleList is not created.
299 When custom name, the custom threshold is used, and useCustomThreshold
300 must be True.
301 When 'standard', 'tight', or 'loose', a cut with Ks efficiency
302 90%, 95%, and 85% is applied.
303 @param extraInfoName_V0Selector Variable name for V0Selector MVA output.
304 @param extraInfoName_LambdaVeto Variable name for LambdaVeto MVA output.
305 @param identifier_Ks Identifier name for V0Selector weight file.
306 @param identifier_vLambda Identifier name for LambdaVeto weight file.
307 @param useCustomThreshold Flag whether threshold_V0Selector and threshold_LambdaVeto are used.
308 @param threshold_V0Selector Threshold for V0Selector.
309 @param threshold_LambdaVeto Threshold for LambdaVeto.
310 @param path Basf2 path to execute.
311
312 """
313
314 add_default_ks_Selector_aliases()
315
316 path.add_module('MVAMultipleExperts',
317 listNames=[particleListName],
318 extraInfoNames=[extraInfoName_V0Selector, extraInfoName_LambdaVeto],
319 identifiers=[identifier_Ks, identifier_vLambda])
320
321 _effnames = ['standard', 'tight', 'loose']
322 outputListName = ''
323
324 if useCustomThreshold:
325 if output_label_name in _effnames:
326 B2ERROR('KsSelector: Specify label name except for \'standard\', \'tight\', and \'loose\' '
327 'when you use custom threshold.')
328 elif output_label_name == '':
329 B2ERROR('KsSelector: Specify label name when you use custom threshold.')
330 else:
331 outputListName = particleListName.split(':')[0] + ':' + output_label_name
332 B2INFO('KsSelector: Custom Cut is applied on '+outputListName+'.')
333 V0_thr = threshold_V0Selector
334 Lambda_thr = threshold_LambdaVeto
335 B2INFO('KsSelector: Threshold is (' + str(V0_thr) + ', ' + str(Lambda_thr) + ')')
336 cut_string = 'extraInfo('+extraInfoName_V0Selector+')>'+str(V0_thr) + \
337 ' and extraInfo('+extraInfoName_LambdaVeto+')>'+str(Lambda_thr)
338 ma.cutAndCopyLists(outputListName, particleListName, cut=cut_string, path=path)
339 else:
340 if output_label_name in _effnames:
341 outputListName = particleListName.split(':')[0] + ':' + output_label_name
342 V0_thr = 0
343 Lambda_thr = 0
344 if output_label_name == 'standard':
345 V0_thr = 0.92
346 Lambda_thr = 0.11
347 B2INFO('KsSelector: Standard Cut for MC16 is applied on '+outputListName+'.')
348
349 elif output_label_name == 'tight':
350 B2INFO('KsSelector: Tight Cut for MC16 is applied on '+outputListName+'.')
351 V0_thr = 0.98
352 Lambda_thr = 0.31
353
354 elif output_label_name == 'loose':
355 B2INFO('KsSelector: Loose Cut for MC 16 is applied on '+outputListName+'.')
356 V0_thr = 0.43
357 Lambda_thr = 0.02
358
359 B2INFO('KsSelector: Threshold is (' + str(V0_thr) + ', ' + str(Lambda_thr) + ')')
360 cut_string = 'extraInfo('+extraInfoName_V0Selector+')>'+str(V0_thr) + \
361 ' and extraInfo('+extraInfoName_LambdaVeto+')>'+str(Lambda_thr)
362 ma.cutAndCopyLists(outputListName, particleListName, cut=cut_string, path=path)
363 elif output_label_name == '':
364 outputListName = particleListName
365 else:
366 B2ERROR('KsSelector: Label should be \'\', \'standard\', \'tight\', or \'loose\' if you do'
367 'not apply custom threshold')
368
369 B2INFO('KsSelector: ParticleList '+outputListName+' is returned.')