Belle II Software development
ksSelector.py
1#!/usr/bin/env python3
2
9
10from basf2 import B2INFO, B2ERROR
11import basf2_mva
12import modularAnalysis as ma
13import variables
14from variables import utils
15
16
17def add_default_ks_Selector_aliases():
18 """
19 This function is used to set up variables aliases for ks Selector variables.
20 """
21 B2INFO('KsSelector: creating variables alias.')
22 variables.variables.addAlias('M_lambda_p', 'useAlternativeDaughterHypothesis(M, 0:p+)')
23 variables.variables.addAlias('M_lambda_antip', 'useAlternativeDaughterHypothesis(M, 1:anti-p-)')
24 variables.variables.addAlias('daughtersDeltaZ', 'daughterDiffOf(0, 1, dz)')
25 variables.variables.addAlias('cosVertexMomentum', 'cosAngleBetweenMomentumAndVertexVector')
26 variables.variables.addAlias('pip_nPXDHits', 'daughter(0,nPXDHits)')
27 variables.variables.addAlias('pin_nPXDHits', 'daughter(1,nPXDHits)')
28 variables.variables.addAlias('pip_nSVDHits', 'daughter(0,nSVDHits)')
29 variables.variables.addAlias('pin_nSVDHits', 'daughter(1,nSVDHits)')
30 variables.variables.addAlias('daughterAngleDiffInMother', 'useRestFrame(daughterAngle(0, 1))')
31 variables.variables.addAlias('pip_p', 'daughter(0,p)')
32 variables.variables.addAlias('pin_p', 'daughter(1,p)')
33 variables.variables.addAlias('pip_dr', 'daughter(0,dr)')
34 variables.variables.addAlias('pin_dr', 'daughter(1,dr)')
35 variables.variables.addAlias('pip_cosTheta', 'daughter(0,cosTheta)')
36 variables.variables.addAlias('pin_cosTheta', 'daughter(1,cosTheta)')
37 variables.variables.addAlias('pip_protonID', 'daughter(0,protonID)')
38 variables.variables.addAlias('pin_protonID', 'daughter(1,protonID)')
39
40
41def add_variable_collection():
42 """
43 Call this function to add variable collection for ksSelector.
44 """
45 add_default_ks_Selector_aliases()
46 inputVariablesList = [
47 'cosVertexMomentum',
48 'flightDistance',
49 'significanceOfDistance',
50 'cosHelicityAngleMomentum',
51 'ImpactXY',
52 'decayAngle(0)',
53 'decayAngle(1)',
54 'daughterAngleDiffInMother',
55 'daughtersDeltaZ',
56 'pip_nSVDHits', 'pip_nPXDHits',
57 'pin_nSVDHits', 'pin_nPXDHits',
58 'pip_dr', 'pin_dr',
59 'pip_protonID', 'pin_protonID',
60 'M_lambda_p', 'M_lambda_antip',
61 'pip_p', 'pin_p',
62 'pip_cosTheta', 'pin_cosTheta',
63 'ArmenterosLongitudinalMomentumAsymmetry',
64 'ArmenterosDaughter1Qt',
65 'ArmenterosDaughter2Qt'
66 ]
67 utils.add_collection(inputVariablesList, 'ks_selector_info')
68
69
70def V0Selector_Training(
71 train_data,
72 tree_name="tree",
73 mva_identifier="MVAFastBDT_V0Selector.root",
74 target_variable="isSignal",
75 parameters={},
76 options={}
77):
78 """
79 Defines the configuration of V0Selector Training.
80 The training data should contain K_S0 and misreconstructed K_S0 without Lambda0.
81
82 @param train_data Root file containing Ks information to be trained.
83 @param tree_name Tree name for variables.
84 @param mva_identifier Name for output MVA weight file.
85 @param target_variable Target variable for MVA training.
86 @param parameters hyperparameter for LGBM
87 @param options MVA options
88 """
89 trainVars = [
90 'cosVertexMomentum',
91 'flightDistance',
92 'significanceOfDistance',
93 'cosHelicityAngleMomentum',
94 'ImpactXY',
95 'decayAngle(0)',
96 'decayAngle(1)',
97 'daughterAngleDiffInMother',
98 'daughtersDeltaZ',
99 'pip_nSVDHits', 'pin_nSVDHits',
100 'pip_dr', 'pin_dr',
101 ]
102
103 general_options = basf2_mva.GeneralOptions()
104 general_options.m_datafiles = basf2_mva.vector(train_data)
105 general_options.m_treename = tree_name
106 general_options.m_identifier = mva_identifier
107 general_options.m_variables = basf2_mva.vector(*trainVars)
108 general_options.m_target_variable = target_variable
109 general_options.m_max_events = 0 if 'max_events' not in options else options['max_events']
110
111 python_options = basf2_mva.PythonOptions()
112 python_options.m_framework = "lightgbm"
113
114 import json
115 param = {'num_leaves': 256,
116 'learning_rate': 0.1,
117 'device_type': "cpu",
118 'deterministic': True,
119 'metric': 'auc',
120 'num_round': 100,
121 # 'stop_round' : 30,
122 'path': mva_identifier+'.txt',
123 'max_bin': 250,
124 'boosting': 'gbdt',
125 'trainFraction': 0.8,
126 'min_data_in_leaf': 4000,
127 'max_depth': 8,
128 'objective': 'cross_entropy',
129 'num_threads': 1
130 }
131 if isinstance(parameters, dict):
132 param.update(parameters)
133 config_string = json.dumps(param)
134 print("The json config string", config_string)
135 python_options.m_config = config_string
136
137 python_options.m_training_fraction = 1
138 python_options.m_normalize = False # we do it inside MVA torch
139 python_options.m_nIterations = 1
140 python_options.m_mini_batch_size = 0
141
142 basf2_mva.teacher(general_options, python_options)
143
144
145def LambdaVeto_Training(
146 train_data,
147 tree_name="tree",
148 mva_identifier="MVAFastBDT_LambdaVeto.root",
149 target_variable="isSignal",
150 parameters={},
151 options={}
152):
153 """
154 Defines the configuration of LambdaVeto Training.
155 The training data should contain only K_S0 and Lambda0.
156
157 @param train_data Root file containing Ks information to be trained.
158 @param tree_name Tree name for variables.
159 @param mva_identifier Name for output MVA weight file.
160 @param target_variable Target variable for MVA training.
161 @param parameters hyperparameter for LGBM
162 @param options MVA options
163 """
164 trainVars = [
165 'pip_protonID',
166 'pin_protonID',
167 'M_lambda_p',
168 'M_lambda_antip',
169 'pip_cosTheta',
170 'pin_cosTheta',
171 'ArmenterosLongitudinalMomentumAsymmetry',
172 'ArmenterosDaughter1Qt',
173 'ArmenterosDaughter2Qt'
174 ]
175
176 general_options = basf2_mva.GeneralOptions()
177 general_options.m_datafiles = basf2_mva.vector(train_data)
178 general_options.m_treename = tree_name
179 general_options.m_identifier = mva_identifier
180 general_options.m_variables = basf2_mva.vector(*trainVars)
181 general_options.m_target_variable = target_variable
182 general_options.m_max_events = 0 if 'max_events' not in options else options['max_events']
183
184 python_options = basf2_mva.PythonOptions()
185 python_options.m_framework = "lightgbm"
186
187 import json
188 param = {'num_leaves': 256,
189 'learning_rate': 0.2,
190 'device_type': "cpu",
191 'deterministic': True,
192 'metric': 'auc',
193 'num_round': 100,
194 # 'stop_round' : 30,
195 'path': mva_identifier+'.txt',
196 'max_bin': 250,
197 'boosting': 'dart',
198 'trainFraction': 0.8,
199 'min_data_in_leaf': 300,
200 'max_depth': 8,
201 'objective': 'cross_entropy',
202 'num_threads': 1
203 }
204 if isinstance(parameters, dict):
205 param.update(parameters)
206 config_string = json.dumps(param)
207 print("The json config string", config_string)
208 python_options.m_config = config_string
209
210 python_options.m_training_fraction = 1
211 python_options.m_normalize = False # we do it inside MVA torch
212 python_options.m_nIterations = 1
213 python_options.m_mini_batch_size = 0
214
215 basf2_mva.teacher(general_options, python_options)
216
217# ****************************************
218# KS Selector MAIN FUNCTION
219# ****************************************
220
221
222def ksSelector(
223 particleListName,
224 identifier_Ks="Ks_LGBM_V0Selector",
225 identifier_vLambda="Ks_LGBM_LambdaVeto",
226 output_label_name='',
227 extraInfoName_V0Selector='KsSelector_V0Selector',
228 extraInfoName_LambdaVeto='KsSelector_LambdaVeto',
229 useCustomThreshold=False,
230 threshold_V0Selector=0.90,
231 threshold_LambdaVeto=0.11,
232 path=None
233):
234 """
235 This function will apply K_S0 selection MVA on the given particleList.
236 By default this function appends MVA output as a extraInfo for the given particleList.
237 You can apply preset cut or custom cut by giving parameters. In this case,
238 a new particleList is created from the original particleList applying cuts on the MVA output.
239
240 @param particleLists Reconstructed Ks -> pi+ pi- list.
241 @param output_label_name Label of the returned Ks particleList.
242 When empty '', no cut is applied and new particleList is not created.
243 When custom name, the custom threshold is used, and useCustomThreshold
244 must be True.
245 When 'standard', 'tight', or 'loose', a cut with Ks efficiency
246 90%, 95%, and 85% is applied.
247 @param extraInfoName_V0Selector Variable name for V0Selector MVA output.
248 @param extraInfoName_LambdaVeto Variable name for LambdaVeto MVA output.
249 @param identifier_Ks Identifier name for V0Selector weight file.
250 @param identifier_vLambda Identifier name for LambdaVeto weight file.
251 @param useCustomThreshold Flag whether threshold_V0Selector and threshold_LambdaVeto are used.
252 @param threshold_V0Selector Threshold for V0Selector.
253 @param threshold_LambdaVeto Threshold for LambdaVeto.
254 @param path Basf2 path to execute.
255
256 """
257
258 add_default_ks_Selector_aliases()
259
260 path.add_module('MVAMultipleExperts',
261 listNames=[particleListName],
262 extraInfoNames=[extraInfoName_V0Selector, extraInfoName_LambdaVeto],
263 identifiers=[identifier_Ks, identifier_vLambda])
264
265 _effnames = ['standard', 'tight', 'loose']
266 outputListName = ''
267
268 if useCustomThreshold:
269 if output_label_name in _effnames:
270 B2ERROR('KsSelector: Specify label name except for \'standard\', \'tight\', and \'loose\' '
271 'when you use custom threshold.')
272 elif output_label_name == '':
273 B2ERROR('KsSelector: Specify label name when you use custom threshold.')
274 else:
275 outputListName = particleListName.split(':')[0] + ':' + output_label_name
276 B2INFO('KsSelector: Custom Cut is applied on '+outputListName+'.')
277 V0_thr = threshold_V0Selector
278 Lambda_thr = threshold_LambdaVeto
279 B2INFO('KsSelector: Threshold is (' + str(V0_thr) + ', ' + str(Lambda_thr) + ')')
280 cut_string = 'extraInfo('+extraInfoName_V0Selector+')>'+str(V0_thr) + \
281 ' and extraInfo('+extraInfoName_LambdaVeto+')>'+str(Lambda_thr)
282 ma.cutAndCopyLists(outputListName, particleListName, cut=cut_string, path=path)
283 else:
284 if output_label_name in _effnames:
285 outputListName = particleListName.split(':')[0] + ':' + output_label_name
286 V0_thr = 0
287 Lambda_thr = 0
288 if output_label_name == 'standard':
289 B2INFO('KsSelector: Standard Cut is applied on '+outputListName+'.')
290 V0_thr = 0.91
291 Lambda_thr = 0.19
292 elif output_label_name == 'tight':
293 B2INFO('KsSelector: Tight Cut is applied on '+outputListName+'.')
294 V0_thr = 0.97
295 Lambda_thr = 0.45
296 elif output_label_name == 'loose':
297 B2INFO('KsSelector: Loose Cut is applied on '+outputListName+'.')
298 V0_thr = 0.51
299 Lambda_thr = 0.02
300 B2INFO('KsSelector: Threshold is (' + str(V0_thr) + ', ' + str(Lambda_thr) + ')')
301 cut_string = 'extraInfo('+extraInfoName_V0Selector+')>'+str(V0_thr) + \
302 ' and extraInfo('+extraInfoName_LambdaVeto+')>'+str(Lambda_thr)
303 ma.cutAndCopyLists(outputListName, particleListName, cut=cut_string, path=path)
304 elif output_label_name == '':
305 outputListName = particleListName
306 else:
307 B2ERROR('KsSelector: Label should be \'\', \'standard\', \'tight\', or \'loose\' if you do'
308 'not apply custom threshold')
309
310 B2INFO('KsSelector: ParticleList '+outputListName+' is returned.')