14 import modularAnalysis
as ma
15 from variables
import variables
18 from ROOT
import Belle2
20 _TrainingMode = Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode
24 _chargednames = [
'pi',
'K',
'p',
'e',
'mu']
25 _pidnames = [
'pionID',
'kaonID',
'protonID',
'electronID',
'muonID']
26 _stdnames = [
'all',
'loose',
'loosepid',
'good',
'higheff']
27 _effnames = [
'95eff',
'90eff',
'85eff']
30 _mostLikelyList =
'mostlikely'
33 def _stdChargedEffCuts(particletype, listtype):
35 Provides the PID cut corresponding to a given efficiency percentile
37 @param particletype type of charged particle (pi, K, p, e, mu)
38 @param listtype efficiency percentile for the list (95eff, 90eff, 85eff)
41 particleindex = _chargednames.index(particletype)
42 effindex = _effnames.index(listtype)
45 effcuts = [[0.001, 0.019, 0.098],
47 [0.000, 0.043, 0.251],
48 [0.093, 0.301, 0.709],
49 [0.187, 0.418, 0.909]]
51 return effcuts[particleindex][effindex]
56 Function to prepare one of several standardized types of charged particle lists:
57 - 'all' with no cuts on track
58 - 'good' high purity lists for data studies
59 - 'loosepid' loose selections for skimming, PID cut only
60 - 'loose' loose selections for skimming
61 - 'higheff' high efficiency list with loose global ID cut for data studies
62 - 'mostlikely' list with the highest PID likelihood
63 Also the following lists, which may or may not be available depending on the release
64 - '99eff' with 99% selection efficiency (calculated for 1<p<4 GeV) and good track (MC only)
65 - '95eff' with 95% selection efficiency (calculated for 1<p<4 GeV) and good track (MC only)
66 - '90eff' with 90% selection efficiency (calculated for 1<p<4 GeV) and good track (MC only)
67 - '85eff' with 85% selection efficiency (calculated for 1<p<4 GeV) and good track (MC only)
69 @param particletype type of charged particle to make a list of
70 @param listtype name of standard list
71 @param path modules are added to this path
75 trackQuality =
'thetaInCDCAcceptance and nCDCHits>20'
76 ipCut =
'dr < 0.5 and abs(dz) < 2'
77 goodTrack = trackQuality +
' and ' + ipCut
79 if particletype
not in _chargednames:
80 b2.B2ERROR(
"The requested list is not a standard charged particle. Use one of pi, K, e, mu, p.")
83 ma.fillParticleList(particletype +
'+:all',
'',
True, path=path)
84 elif listtype ==
'good':
86 particletype +
'+:good',
87 _pidnames[_chargednames.index(particletype)] +
' > 0.5 and ' + goodTrack,
90 elif listtype ==
'loose':
92 particletype +
'+:loose',
93 _pidnames[_chargednames.index(particletype)] +
' > 0.1 and ' + goodTrack,
96 elif listtype ==
'loosepid':
98 particletype +
'+:loosepid',
99 _pidnames[_chargednames.index(particletype)] +
' > 0.1',
102 elif listtype ==
'higheff':
104 particletype +
'+:higheff',
105 _pidnames[_chargednames.index(particletype)] +
' > 0.002 and ' + goodTrack,
108 elif listtype
not in _effnames:
109 b2.B2ERROR(
"The requested list is not defined. Please refer to the stdCharged documentation.")
111 pidcut = _stdChargedEffCuts(particletype, listtype)
112 if 0.0 < pidcut < 1.0:
117 _pidnames[_chargednames.index(particletype)] +
125 b2.B2ERROR(
'The requested standard particle list ' + particletype +
126 '+:' + listtype +
' is not available in this release.')
129 def stdPi(listtype=_defaultlist, path=None):
131 Function to prepare standard pion lists, refer to `stdCharged` for details
133 @param listtype name of standard list
134 @param path modules are added to this path
139 def stdK(listtype=_defaultlist, path=None):
141 Function to prepare standard kaon lists, refer to `stdCharged` for details
143 @param listtype name of standard list
144 @param path modules are added to this path
149 def stdPr(listtype=_defaultlist, path=None):
151 Function to prepare standard proton lists, refer to `stdCharged` for details
153 @param listtype name of standard list
154 @param path modules are added to this path
165 channel_eff="combination",
166 channel_misid_pi="combination",
167 channel_misid_K="combination",
169 outputListLabel=None,
170 trainingModeMulticlass=_TrainingMode.c_Multiclass,
171 trainingModeBinary=_TrainingMode.c_Classification,
174 Function to prepare one of several standardized types of lepton (:math:`e,\\mu`) lists, with the following working points:
176 * 'FixedThresh05', PID cut of > 0.5 for each particle in the list.
177 * 'FixedThresh09', PID cut of > 0.9 for each particle in the list.
178 * 'FixedThresh095', PID cut of > 0.95 for each particle in the list.
179 * 'FixedThresh099', PID cut of > 0.99 for each particle in the list.
180 * 'UniformEff60' 60% lepton efficiency list, uniform in a given multi-dimensional parametrisation.
181 * 'UniformEff70' 70% lepton efficiency list, uniform in a given multi-dimensional parametrisation.
182 * 'UniformEff80' 80% lepton efficiency list, uniform in a given multi-dimensional parametrisation.
183 * 'UniformEff90' 90% lepton efficiency list, uniform in a given multi-dimensional parametrisation.
184 * 'UniformEff95' 95% lepton efficiency list, uniform in a given multi-dimensional parametrisation.
185 * 'UniformPiFR5EM1' 50% pion to lepton fake rate, uniform in a given multi-dimensional parametrisation.
186 * 'UniformPiFR1EM1' 10% pion to lepton fake rate, uniform in a given multi-dimensional parametrisation.
187 * 'UniformPiFR5EM2' 5% pion to lepton fake rate, uniform in a given multi-dimensional parametrisation.
188 * 'UniformPiFR1EM2' 1% pion to lepton fake rate, uniform in a given multi-dimensional parametrisation.
189 * 'UniformPiFR5EM3' 0.5% pion to lepton fake rate, uniform in a given multi-dimensional parametrisation.
190 * 'UniformPiFR1EM3' 0.1% pion to lepton fake rate, uniform in a given multi-dimensional parametrisation.
192 The function creates a ``ParticleList``, selecting particles according to the chosen ``working_point``,
193 and decorates each candidate in the list with the nominal Data/MC :math:`\\ell` ID efficiency and
194 :math:`\\pi,K` fake rate correction factors and their stat, syst uncertainty, reading the info
195 from the Conditions Database (CDB) according to the chosen input global tag (GT).
198 Particles will **not** be selected if they are outside the Data/MC *efficiency* corrections' phase space coverage
199 for the given working point.
200 In fact, the threshold value for the PID cut in such cases is set to NaN.
203 At the moment, the only supported *binary* lepton identification is against the **pion** hypothesis.
204 This implies that, if binary classification is chosen, only :math:`\\pi` fake rate corrections will be added to the
205 resulting particle list.
208 pdgId (int): the lepton pdg code.
209 working_point (str): name of the chosen working point that defines the content of the list. Choose among the above values.
210 method (str): the PID method: 'likelihood' or 'bdt'.
211 classification (str): the type of classifier: 'binary' (one-vs-pion) or 'global' (one-vs-all).
212 lid_weights_gt (str): the name identifier of the global tag with the recommended Data/MC correction weights.
216 `Lepton ID Confluence page <https://confluence.desy.de/display/BI/Lepton+ID+Performance>`_
217 for info about the recommended global tags.
219 release (Optional[int]): the major release number of the data and MC campaigns considered.
220 If specified, it ensures the correct :math:`\\ell` ID variables are used.
224 `Lepton ID Confluence page <https://confluence.desy.de/display/BI/Lepton+ID+Performance>`_
225 for info about lepton identification variables and campaigns.
227 channel_eff (Optional[str]): the channel used to derive the :math:`\\ell` ID efficiency corrections.
228 By default, 'combination' is set, meaning they are obtained by combining results
229 of several hadronic and low multiplicity channels, wherever they overlap.
233 `Lepton ID Confluence page <https://confluence.desy.de/display/BI/Lepton+ID+Performance>`_
234 for other possible choices (if any).
236 channel_misid_pi (Optional[str]): the channel used to derive the :math:`\\pi` fake rate corrections.
237 channel_misid_K (Optional[str]): the channel used to derive the :math:`K` fake rate corrections.
238 inputListName (Optional[str]): the name of a pre-existing ``ParticleList`` object (defined as a full ``decayString``,
239 e.g. 'e-:my_input_electrons') of which the standard lepton list will be a subset.
240 For instance, users might want to apply a Bremsstrahlung correction to electrons first,
241 which modifies their 4-momentum, and only later define the subset passing the PID selection,
242 including the appropriate PID weights and uncertainties (which are :math:`p`-dependent).
243 By default, the standard lepton list is created from all ``Track`` objects in the event.
246 Do **not** apply any PID selection on the input list, otherwise results could be biased.
248 outputListLabel (Optional[str]): the name of the output lepton list label, i.e.,
249 the string that follows the particle identifier ('e-:', 'mu-:').
250 By default, it is assigned as:
251 ``'{method}_{classification}_{working_point}'``.
253 trainingModeMulticlass (Optional[``Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode``]): enum identifier
254 of the multi-class (global PID) training mode.
255 See `modularAnalysis.applyChargedPidMVA` docs for available options.
256 trainingModeBinary (Optional[``Belle2.ChargedPidMVAWeights.ChargedPidMVATrainingMode``]): enum identifier
257 of the classification (binary PID) training mode.
258 See `modularAnalysis.applyChargedPidMVA` docs for available options.
259 path (basf2.Path): modules are added to this path.
262 tuple(str, list(str)): the alias for the lepton ID variable, and the list of aliases for the weights.
283 available_methods = (
"likelihood",
"bdt")
284 available_classificators = (
"global",
"binary")
286 if working_point
not in working_points:
287 b2.B2ERROR(f
"The requested lepton list working point: {working_point} is not defined. \
288 Please refer to the stdLep and stdCharged documentation.")
291 if method
not in available_methods:
292 b2.B2ERROR(f
"method: {method}. Must be any of: {available_methods}.")
295 if classification
not in available_classificators:
296 b2.B2ERROR(f
"classification: {classification}. Must be any of: {available_classificators}.")
299 b2.B2INFO(f
"Prepending GT with LID corrections: {lid_weights_gt}")
300 b2.conditions.prepend_globaltag(lid_weights_gt)
306 electron = Const.electron.getPDGCode()
307 muon = Const.muon.getPDGCode()
308 pion = Const.pion.getPDGCode()
310 if lepton
not in (electron, muon):
311 b2.B2ERROR(f
"{pdgId} is not that of a light charged lepton.")
319 electron:
"electronID",
323 electron:
"electronID",
329 electron: f
"binaryPID({electron}, {pion})",
330 muon: f
"binaryPID({muon}, {pion})",
333 electron:
"binaryPID_e_pi",
334 muon:
"binaryPID_mu_pi",
341 lepton: f
"pidChargedBDTScore({lepton}, ALL)",
344 lepton: re.sub(
r"\W+",
"", f
"pidChargedBDTScore_{lepton_name}"),
349 lepton: f
"pidPairChargedBDTScore({lepton}, {pion}, ALL)",
352 lepton: re.sub(
r"\W+",
"", f
"pidPairChargedBDTScore_{lepton_name}_pi"),
360 if int(release)
in [5, 6]:
361 if lepton == electron:
362 b2.B2INFO(f
"The likelihood-based electron ID in release {release} samples is defined w/o the SVD and the TOP")
363 pid_variables[
"likelihood"][
"global"][
"var"][electron] =
"electronID_noSVD_noTOP"
364 pid_variables[
"likelihood"][
"global"][
"alias"][electron] =
"electronID_noSVD_noTOP"
365 pid_variables[
"likelihood"][
"binary"][
"var"][electron] = f
"binaryElectronID_noSVD_noTOP({pion})"
366 pid_variables[
"likelihood"][
"binary"][
"alias"][electron] =
"binaryElectronID_noSVD_noTOP_pi"
368 b2.B2INFO(f
"The likelihood-based muon ID in release {release} samples is defined w/o the SVD")
369 pid_variables[
"likelihood"][
"global"][
"var"][muon] =
"muonID_noSVD"
370 pid_variables[
"likelihood"][
"global"][
"alias"][muon] =
"muonID_noSVD"
371 pid_variables[
"likelihood"][
"binary"][
"var"][muon] = f
"binaryPID_noSVD({muon}, {pion})"
372 pid_variables[
"likelihood"][
"binary"][
"alias"][muon] =
"binaryMuonID_noSVD_pi"
375 pid_var = pid_variables[method][classification][
"var"][lepton]
376 pid_alias = pid_variables[method][classification][
"alias"][lepton]
377 if pid_alias != pid_var:
378 variables.addAlias(pid_alias, pid_var)
381 outputListName = f
"{lepton_name}:{method}_{classification}_{working_point}"
382 if outputListLabel
is not None:
383 outputListName = f
"{lepton_name}:{outputListLabel}"
385 if inputListName
is None:
386 ma.fillParticleList(outputListName,
"", path=path)
389 f
"The standard lepton list: '{outputListName}' will be created as a subset \
390 of the following ParticleList: '{inputListName}'")
391 ma.copyList(outputListName, inputListName, path=path)
395 if classification ==
"global":
396 ma.applyChargedPidMVA(particleLists=[outputListName],
398 trainingMode=trainingModeMulticlass)
399 elif classification ==
"binary":
400 ma.applyChargedPidMVA(particleLists=[outputListName],
402 binaryHypoPDGCodes=(lepton, pion),
403 trainingMode=trainingModeBinary)
406 payload_eff = f
"ParticleReweighting:{pid_alias}_eff_{channel_eff}_{working_point}"
407 payload_misid_pi = f
"ParticleReweighting:{pid_alias}_misid_pi_{channel_misid_pi}_{working_point}"
408 payload_misid_K = f
"ParticleReweighting:{pid_alias}_misid_K_{channel_misid_K}_{working_point}"
411 path.add_module(
"ParticleWeighting",
412 particleList=outputListName,
413 tableName=payload_eff).set_name(f
"ParticleWeighting_eff_{outputListName}")
414 path.add_module(
"ParticleWeighting",
415 particleList=outputListName,
416 tableName=payload_misid_pi).set_name(f
"ParticleWeighting_misid_pi_{outputListName}")
417 if classification ==
"global":
418 path.add_module(
"ParticleWeighting",
419 particleList=outputListName,
420 tableName=payload_misid_K).set_name(f
"ParticleWeighting_misid_K_{outputListName}")
424 cut = f
"[{pid_alias} >= extraInfo({payload_eff}_threshold)]"
425 ma.applyCuts(outputListName, cut, path=path)
428 weight_aliases_to_var = {
429 f
"weight_{pid_alias}_eff_{working_point}": f
"extraInfo({payload_eff}_data_MC_ratio)",
430 f
"weight_{pid_alias}_misid_pi_{working_point}": f
"extraInfo({payload_misid_pi}_data_MC_ratio)",
432 f
"weight_{pid_alias}_eff_{working_point}_stat_up": f
"extraInfo({payload_eff}_data_MC_uncertainty_stat_up)",
433 f
"weight_{pid_alias}_eff_{working_point}_stat_dn": f
"extraInfo({payload_eff}_data_MC_uncertainty_stat_dn)",
434 f
"weight_{pid_alias}_eff_{working_point}_sys_up": f
"extraInfo({payload_eff}_data_MC_uncertainty_sys_up)",
435 f
"weight_{pid_alias}_eff_{working_point}_sys_dn": f
"extraInfo({payload_eff}_data_MC_uncertainty_sys_dn)",
436 f
"weight_{pid_alias}_misid_pi_{working_point}_stat_up": f
"extraInfo({payload_misid_pi}_data_MC_uncertainty_stat_up)",
437 f
"weight_{pid_alias}_misid_pi_{working_point}_stat_dn": f
"extraInfo({payload_misid_pi}_data_MC_uncertainty_stat_dn)",
438 f
"weight_{pid_alias}_misid_pi_{working_point}_sys_up": f
"extraInfo({payload_misid_pi}_data_MC_uncertainty_sys_up)",
439 f
"weight_{pid_alias}_misid_pi_{working_point}_sys_dn": f
"extraInfo({payload_misid_pi}_data_MC_uncertainty_sys_dn)",
448 f
"weight_{pid_alias}_eff_{working_point}_rel_stat_up":
449 f
"formula(1+[extraInfo({payload_eff}_data_MC_uncertainty_stat_up)/extraInfo({payload_eff}_data_MC_ratio)])",
450 f
"weight_{pid_alias}_eff_{working_point}_rel_stat_dn":
451 f
"formula(1-[extraInfo({payload_eff}_data_MC_uncertainty_stat_dn)/extraInfo({payload_eff}_data_MC_ratio)])",
452 f
"weight_{pid_alias}_eff_{working_point}_rel_sys_up":
453 f
"formula(1+[extraInfo({payload_eff}_data_MC_uncertainty_sys_up)/extraInfo({payload_eff}_data_MC_ratio)])",
454 f
"weight_{pid_alias}_eff_{working_point}_rel_sys_dn":
455 f
"formula(1-[extraInfo({payload_eff}_data_MC_uncertainty_sys_dn)/extraInfo({payload_eff}_data_MC_ratio)])",
456 f
"weight_{pid_alias}_misid_pi_{working_point}_rel_stat_up":
457 f
"formula(1+[extraInfo({payload_misid_pi}_data_MC_uncertainty_stat_up)/extraInfo({payload_misid_pi}_data_MC_ratio)])",
458 f
"weight_{pid_alias}_misid_pi_{working_point}_rel_stat_dn":
459 f
"formula(1-[extraInfo({payload_misid_pi}_data_MC_uncertainty_stat_dn)/extraInfo({payload_misid_pi}_data_MC_ratio)])",
460 f
"weight_{pid_alias}_misid_pi_{working_point}_rel_sys_up":
461 f
"formula(1+[extraInfo({payload_misid_pi}_data_MC_uncertainty_sys_up)/extraInfo({payload_misid_pi}_data_MC_ratio)])",
462 f
"weight_{pid_alias}_misid_pi_{working_point}_rel_sys_dn":
463 f
"formula(1-[extraInfo({payload_misid_pi}_data_MC_uncertainty_sys_dn)/extraInfo({payload_misid_pi}_data_MC_ratio)])",
465 if classification ==
"global":
466 weight_aliases_to_var.update({
467 f
"weight_{pid_alias}_misid_K_{working_point}": f
"extraInfo({payload_misid_K}_data_MC_ratio)",
469 f
"weight_{pid_alias}_misid_K_{working_point}_stat_up": f
"extraInfo({payload_misid_K}_data_MC_uncertainty_stat_up)",
470 f
"weight_{pid_alias}_misid_K_{working_point}_stat_dn": f
"extraInfo({payload_misid_K}_data_MC_uncertainty_stat_dn)",
471 f
"weight_{pid_alias}_misid_K_{working_point}_sys_up": f
"extraInfo({payload_misid_K}_data_MC_uncertainty_sys_up)",
472 f
"weight_{pid_alias}_misid_K_{working_point}_sys_dn": f
"extraInfo({payload_misid_K}_data_MC_uncertainty_sys_dn)",
474 f
"weight_{pid_alias}_misid_K_{working_point}_rel_stat_up":
475 f
"formula(1+[extraInfo({payload_misid_K}_data_MC_uncertainty_stat_up)/extraInfo({payload_misid_K}_data_MC_ratio)])",
476 f
"weight_{pid_alias}_misid_K_{working_point}_rel_stat_dn":
477 f
"formula(1-[extraInfo({payload_misid_K}_data_MC_uncertainty_stat_dn)/extraInfo({payload_misid_K}_data_MC_ratio)])",
478 f
"weight_{pid_alias}_misid_K_{working_point}_rel_sys_up":
479 f
"formula(1+[extraInfo({payload_misid_K}_data_MC_uncertainty_sys_up)/extraInfo({payload_misid_K}_data_MC_ratio)])",
480 f
"weight_{pid_alias}_misid_K_{working_point}_rel_sys_dn":
481 f
"formula(1-[extraInfo({payload_misid_K}_data_MC_uncertainty_sys_dn)/extraInfo({payload_misid_K}_data_MC_ratio)])",
484 for alias, var
in weight_aliases_to_var.items():
485 variables.addAlias(alias, var)
487 return pid_alias, list(weight_aliases_to_var.keys())
490 def stdE(listtype=_defaultlist,
495 channel_eff="combination",
496 channel_misid_pi="combination",
497 channel_misid_K="combination",
499 outputListLabel=None,
500 trainingModeMulticlass=_TrainingMode.c_Multiclass,
501 trainingModeBinary=_TrainingMode.c_Classification,
503 """ Function to prepare one of several standardized types of electron lists.
504 See the documentation of `stdLep` for details.
506 It also accepts any of the legacy definitions
507 for the ``listtype`` parameter (aka ``working_point`` in `stdLep`) to fall back to the `stdCharged` behaviour:
519 tuple(str, list(str)): the alias for the electron ID variable, and the list of aliases for the weights.
522 if listtype
in _stdnames + _effnames:
524 return _pidnames[_chargednames.index(
"e")],
None
526 return stdLep(Const.electron.getPDGCode(), listtype, method, classification, lid_weights_gt,
528 channel_eff=channel_eff,
529 channel_misid_pi=channel_misid_pi,
530 channel_misid_K=channel_misid_K,
531 inputListName=inputListName,
532 outputListLabel=outputListLabel,
533 trainingModeMulticlass=trainingModeMulticlass,
534 trainingModeBinary=trainingModeBinary,
538 def stdMu(listtype=_defaultlist,
543 channel_eff="combination",
544 channel_misid_pi="combination",
545 channel_misid_K="combination",
547 outputListLabel=None,
548 trainingModeMulticlass=_TrainingMode.c_Multiclass,
549 trainingModeBinary=_TrainingMode.c_Classification,
551 """ Function to prepare one of several standardized types of muon lists.
552 See the documentation of `stdLep` for details.
554 It also accepts any of the legacy definitions
555 for the ``listtype`` parameter (aka ``working_point`` in `stdLep`) to fall back to the `stdCharged` behaviour:
567 tuple(str, list(str)): the alias for the muon ID variable, and the list of aliases for the weights.
570 if listtype
in _stdnames + _effnames:
572 return _pidnames[_chargednames.index(
"mu")],
None
574 return stdLep(Const.muon.getPDGCode(), listtype, method, classification, lid_weights_gt,
576 channel_eff=channel_eff,
577 channel_misid_pi=channel_misid_pi,
578 channel_misid_K=channel_misid_K,
579 inputListName=inputListName,
580 outputListLabel=outputListLabel,
581 trainingModeMulticlass=trainingModeMulticlass,
582 trainingModeBinary=trainingModeBinary,
586 def stdMostLikely(pidPriors=None, suffix='', custom_cuts='', path=None):
588 Function to prepare most likely particle lists according to PID likelihood, refer to stdCharged for details
590 @param pidPriors list of 6 float numbers used to reweight PID likelihoods, for e, mu, pi, K, p and d
591 @param suffix string added to the end of particle list names
592 @param custom_cuts custom selection cut string, if empty, standard track quality cuts will be applied
593 @param path modules are added to this path
599 if pidPriors
is not None:
600 args = str(pidPriors)[1:-1]
601 trackQuality =
'thetaInCDCAcceptance and nCDCHits>20'
602 if custom_cuts !=
'':
603 trackQuality = custom_cuts
604 for name
in _chargednames:
605 ma.fillParticleList(f
'{name}+:{_mostLikelyList}{suffix}',
606 f
'pidIsMostLikely({args}) > 0 and {trackQuality}',
True, path=path)
This class provides a set of constants for the framework.