12 The Full Event Interpretation Algorithm
15 - The algorithm will automatically reconstruct B mesons and calculate a signal probability for each candidate.
16 - It can be used for hadronic and semileptonic tagging.
17 - The algorithm has to be trained on MC, and can afterwards be applied on data.
18 - The training requires O(100) million MC events
19 - The weight files are stored in the Belle II Condition database
21 Read this file if you want to understand the technical details of the FEI.
23 The FEI follows a hierarchical approach.
25 (Stage -1: Write out information about the provided data sample)
26 Stage 0: Final State Particles (FSP)
27 Stage 1: pi0, J/Psi, Lambda0
29 Stage 3: D and Lambda_c mesons
34 Most stages consists of:
35 - Create Particle Candidates
38 - Apply a multivariate classification method
41 The FEI will reconstruct these 7 stages during the training phase,
42 since the stages depend on one another, you have to run basf2 multiple (7) times on the same data
43 to train all the necessary multivariate classifiers.
48from basf2
import B2INFO, B2WARNING, B2ERROR
50import modularAnalysis
as ma
70FeiState = collections.namedtuple(
'FeiState',
'path, stage, plists, fsplists, excludelists')
75 Contains the relevant information about the used training data.
76 Basically we write out the number of MC particles in the whole dataset.
77 This numbers we can use to calculate what fraction of candidates we have to write
78 out as TrainingData to get a reasonable amount of candidates to train on
79 (too few candidates will lead to heavy overtraining, too many won't fit into memory).
80 Secondly we can use this information for the generation of the monitoring pdfs,
81 where we calculate reconstruction efficiencies.
86 Create a new TrainingData object
87 @param particles list of config.Particle objects
88 @param outputPath path to the output directory
93 self.
filename = os.path.join(outputPath,
'mcParticlesCount.root')
97 Check if the relevant information is already available
103 Returns pybasf2.Path which counts the number of MCParticles in each event.
104 @param particles list of config.Particle objects
109 path = basf2.create_path()
110 module = basf2.register_module(
'VariablesToHistogram')
111 module.set_name(
"VariablesToHistogram_MCCount")
112 module.param(
'variables', [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5)
for pdg
in pdgs])
113 module.param(
'fileName', self.
filename)
114 module.param(
'ignoreCommandLineOverride',
True)
115 path.add_module(module)
120 Read out the number of MC particles from the file created by reconstruct
125 root_file = ROOT.TFile.Open(self.
filename,
'read')
128 for key
in root_file.GetListOfKeys():
129 variable = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
130 pdg = abs(int(variable[len(
'NumberOfMCParticlesInEvent('):-len(
")")]))
133 mc_counts[pdg][
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
134 for bin
in range(hist.GetNbinsX()))
135 mc_counts[pdg][
'std'] = hist.GetStdDev()
136 mc_counts[pdg][
'avg'] = hist.GetMean()
137 mc_counts[pdg][
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
138 mc_counts[pdg][
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
141 mc_counts[0][
'sum'] = hist.GetEntries()
148 Steers the loading of FSP particles.
149 This does NOT include RootInput, Geometry or anything required before loading FSPs,
150 the user has to add this himself (because it depends on the MC campaign and if you want
151 to use Belle or Belle II).
156 Create a new FSPLoader object
157 @param particles list of config.Particle objects
158 @param config config.FeiConfiguration object
167 Returns a list of FSP particle lists which are used in the FEI.
168 This is used to create the RootOutput module.
170 fsps = [
'K+:FSP',
'pi+:FSP',
'e+:FSP',
'mu+:FSP',
'p+:FSP',
'gamma:FSP',
'K_S0:V0',
'Lambda0:V0',
'K_L0:FSP',
'gamma:V0']
177 Returns pybasf2.Path which loads the FSP Particles
179 path = basf2.create_path()
182 ma.fillParticleLists([(
'K+:FSP',
''), (
'pi+:FSP',
''), (
'e+:FSP',
''),
183 (
'mu+:FSP',
''), (
'p+:FSP',
'')], writeOut=
True, path=path)
184 for outputList, inputList
in [(
'gamma:FSP',
'gamma:mdst'), (
'K_S0:V0',
'K_S0:mdst'),
185 (
'Lambda0:V0',
'Lambda0:mdst'), (
'K_L0:FSP',
'K_L0:mdst'),
186 (
'pi0:FSP',
'pi0:mdst'), (
'gamma:V0',
'gamma:v0mdst')]:
187 ma.copyParticles(outputList, inputList, writeOut=
True, path=path)
189 ma.fillParticleLists([(
'K+:FSP',
''), (
'pi+:FSP',
''), (
'e+:FSP',
''),
190 (
'mu+:FSP',
''), (
'gamma:FSP',
''),
191 (
'p+:FSP',
''), (
'K_L0:FSP',
'')], writeOut=
True, path=path)
192 ma.fillParticleList(
'K_S0:V0 -> pi+ pi-',
'', writeOut=
True, path=path)
193 ma.fillParticleList(
'Lambda0:V0 -> p+ pi-',
'', writeOut=
True, path=path)
194 ma.fillConvertedPhotonsList(
'gamma:V0 -> e+ e-',
'', writeOut=
True, path=path)
197 names = [
'e+',
'K+',
'pi+',
'mu+',
'gamma',
'K_S0',
'p+',
'K_L0',
'Lambda0',
'pi0']
198 filename = os.path.join(self.
config.monitoring_path,
'Monitor_FSPLoader.root')
200 variables = [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5)
for pdg
in pdgs]
201 ma.variablesToHistogram(
'', variables=variables, filename=filename, ignoreCommandLineOverride=
True, path=path)
207 Steers the creation of the training data.
208 The training data is used to train a multivariate classifier for each channel.
209 The training of the FEI at its core is just generating this training data for each channel.
210 After we created the training data for a stage, we have to train the classifiers (see Teacher class further down).
214 mc_counts: typing.Mapping[int, typing.Mapping[str, float]]):
216 Create a new TrainingData object
217 @param particles list of config.Particle objects
218 @param config config.FeiConfiguration object
219 @param mc_counts containing number of MC Particles
230 Returns pybasf2.Path which creates the training data for the given particles
233 path = basf2.create_path()
238 print(f
"FEI-core: TrainingData: nSignal for {particle.name}: {nSignal}")
247 for channel
in particle.channels:
248 weightfile = f
'{channel.label}.xml'
249 if basf2_mva.available(weightfile):
250 B2INFO(f
"FEI-core: Skipping preparing Training Data for {weightfile}, already available")
252 filename =
'training_input.root'
255 nBackground = self.
mc_counts[0][
'sum'] * channel.preCutConfig.bestCandidateCut
256 inverseSamplingRates = {}
259 if nBackground > Teacher.MaximumNumberOfMVASamples
and not channel.preCutConfig.noBackgroundSampling:
260 inverseSamplingRates[0] = max(
261 1, int((int(nBackground / Teacher.MaximumNumberOfMVASamples) + 1) * channel.preCutConfig.bkgSamplingFactor))
262 elif channel.preCutConfig.bkgSamplingFactor > 1:
263 inverseSamplingRates[0] = int(channel.preCutConfig.bkgSamplingFactor)
265 if nSignal > Teacher.MaximumNumberOfMVASamples
and not channel.preCutConfig.noSignalSampling:
266 inverseSamplingRates[1] = int(nSignal / Teacher.MaximumNumberOfMVASamples) + 1
268 spectators = [channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
269 if channel.mvaConfig.sPlotVariable
is not None:
270 spectators.append(channel.mvaConfig.sPlotVariable)
273 hist_variables = [
'mcErrors',
'mcParticleStatus'] + channel.mvaConfig.variables + spectators
274 hist_variables_2d = [(x, channel.mvaConfig.target)
275 for x
in channel.mvaConfig.variables + spectators
if x
is not channel.mvaConfig.target]
276 hist_filename = os.path.join(self.
config.monitoring_path,
'Monitor_TrainingData.root')
277 ma.variablesToHistogram(channel.name, variables=config.variables2binnings(hist_variables),
278 variables_2d=config.variables2binnings_2d(hist_variables_2d),
279 filename=hist_filename,
280 ignoreCommandLineOverride=
True,
281 directory=config.removeJPsiSlash(f
'{channel.label}'), path=path)
283 teacher = basf2.register_module(
'VariablesToNtuple')
284 teacher.set_name(f
'VariablesToNtuple_{channel.name}')
285 teacher.param(
'fileName', filename)
286 teacher.param(
'treeName', ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{channel.label} variables'))
287 teacher.param(
'variables', channel.mvaConfig.variables + spectators)
288 teacher.param(
'particleList', channel.name)
289 teacher.param(
'sampling', (channel.mvaConfig.target, inverseSamplingRates))
290 teacher.param(
'ignoreCommandLineOverride',
True)
291 path.add_module(teacher)
297 Steers the reconstruction phase before the mva method was applied
299 - The ParticleCombination (for each particle and channel we create candidates using
300 the daughter candidates from the previous stages)
302 - Vertex Fitting (this is the slowest part of the whole FEI, KFit is used by default,
303 but you can use fastFit as a drop-in replacement https://github.com/thomaskeck/FastFit/,
304 this will speed up the whole FEI by a factor 2-3)
309 Create a new PreReconstruction object
310 @param particles list of config.Particle objects
311 @param config config.FeiConfiguration object
320 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
322 path = basf2.create_path()
325 for channel
in particle.channels:
328 channel.daughters[0].split(
':')[0]) ==
pdg.from_name(particle.name)):
329 ma.cutAndCopyList(channel.name, channel.daughters[0], channel.preCutConfig.userCut, writeOut=
True, path=path)
330 v2EI = basf2.register_module(
'VariablesToExtraInfo')
331 v2EI.set_name(f
'VariablesToExtraInfo_{channel.name}')
332 v2EI.param(
'particleList', channel.name)
333 v2EI.param(
'variables', {f
'constant({channel.decayModeID})':
'decayModeID'})
335 v2EI.set_log_level(basf2.logging.log_level.ERROR)
336 path.add_module(v2EI)
338 ma.reconstructDecay(channel.decayString, channel.preCutConfig.userCut, channel.decayModeID,
339 writeOut=
True, path=path)
341 if "tag" in (channel.name).lower():
342 ma.matchTagTruth(channel.name, path=path)
344 ma.matchMCTruth(channel.name, path=path)
345 bc_variable = channel.preCutConfig.bestCandidateVariable
346 if self.
config.monitor ==
'simple':
347 hist_variables = [channel.mvaConfig.target,
'extraInfo(decayModeID)']
348 hist_variables_2d = [(channel.mvaConfig.target,
'extraInfo(decayModeID)')]
350 hist_variables = [bc_variable,
'mcErrors',
'mcParticleStatus',
351 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
352 hist_variables_2d = [(bc_variable, channel.mvaConfig.target),
353 (bc_variable,
'mcErrors'),
354 (bc_variable,
'mcParticleStatus')]
355 for specVar
in channel.mvaConfig.spectators:
356 hist_variables_2d.append((bc_variable, specVar))
357 hist_variables_2d.append((channel.mvaConfig.target, specVar))
358 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PreReconstruction_BeforeRanking.root')
359 ma.variablesToHistogram(
361 variables=config.variables2binnings(hist_variables),
362 variables_2d=config.variables2binnings_2d(hist_variables_2d),
364 ignoreCommandLineOverride=
True,
365 directory=f
'{channel.label}',
368 if channel.preCutConfig.bestCandidateMode ==
'lowest':
369 ma.rankByLowest(channel.name,
370 channel.preCutConfig.bestCandidateVariable,
371 channel.preCutConfig.bestCandidateCut,
374 elif channel.preCutConfig.bestCandidateMode ==
'highest':
375 ma.rankByHighest(channel.name,
376 channel.preCutConfig.bestCandidateVariable,
377 channel.preCutConfig.bestCandidateCut,
381 raise RuntimeError(f
'Unknown bestCandidateMode {repr(channel.preCutConfig.bestCandidateMode)}')
383 if 'gamma' in channel.decayString
and channel.pi0veto:
384 ma.buildRestOfEvent(channel.name, path=path)
385 Ddaughter_roe_path = basf2.Path()
386 deadEndPath = basf2.Path()
387 ma.signalSideParticleFilter(channel.name,
'', Ddaughter_roe_path, deadEndPath)
388 ma.fillParticleList(
'gamma:roe',
'isInRestOfEvent == 1', path=Ddaughter_roe_path)
390 matches = list(re.finditer(
'gamma', channel.decayString))
392 for igamma
in range(len(matches)):
393 start, end = matches[igamma-1].span()
394 tempString = f
'{channel.decayString[:start]}^gamma{channel.decayString[end:]}'
395 ma.fillSignalSideParticleList(f
'gamma:sig_{igamma}', tempString, path=Ddaughter_roe_path)
396 ma.reconstructDecay(f
'pi0:veto_{igamma} -> gamma:sig_{igamma} gamma:roe',
'', path=Ddaughter_roe_path)
397 pi0lists.append(f
'pi0:veto_{igamma}')
398 ma.copyLists(
'pi0:veto', pi0lists, writeOut=
False, path=Ddaughter_roe_path)
399 ma.rankByLowest(
'pi0:veto',
'abs(dM)', 1, path=Ddaughter_roe_path)
400 ma.matchMCTruth(
'pi0:veto', path=Ddaughter_roe_path)
401 ma.variableToSignalSideExtraInfo(
404 'InvM':
'pi0vetoMass',
405 'formula((daughter(0,E)-daughter(1,E))/(daughter(0,E)+daughter(1,E)))':
'pi0vetoEnergyAsymmetry',
407 path=Ddaughter_roe_path
409 path.for_each(
'RestOfEvent',
'RestOfEvents', Ddaughter_roe_path)
412 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PreReconstruction_AfterRanking.root')
413 if self.
config.monitor !=
'simple':
414 hist_variables += [
'extraInfo(preCut_rank)']
415 hist_variables_2d += [(
'extraInfo(preCut_rank)', channel.mvaConfig.target),
416 (
'extraInfo(preCut_rank)',
'mcErrors'),
417 (
'extraInfo(preCut_rank)',
'mcParticleStatus')]
418 for specVar
in channel.mvaConfig.spectators:
419 hist_variables_2d.append((
'extraInfo(preCut_rank)', specVar))
420 ma.variablesToHistogram(
422 variables=config.variables2binnings(hist_variables),
423 variables_2d=config.variables2binnings_2d(hist_variables_2d),
425 ignoreCommandLineOverride=
True,
426 directory=f
'{channel.label}',
430 elif self.
config.training:
431 if "tag" in (channel.name).lower():
432 ma.matchTagTruth(channel.name, path=path)
434 ma.matchMCTruth(channel.name, path=path)
437 pvfit = basf2.register_module(
'ParticleVertexFitter')
438 pvfit.set_name(f
'ParticleVertexFitter_{channel.name}')
439 pvfit.param(
'listName', channel.name)
440 pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
441 pvfit.param(
'vertexFitter',
'KFit')
442 pvfit.param(
'fitType',
'vertex')
443 pvfit.set_log_level(basf2.logging.log_level.ERROR)
444 path.add_module(pvfit)
445 elif re.findall(
r"[\w']+", channel.decayString).count(
'pi0') > 1
and particle.name !=
'pi0':
446 basf2.B2INFO(f
"Ignoring vertex fit for {channel.name} because multiple pi0 are not supported yet.")
447 elif len(channel.daughters) > 1:
448 pvfit = basf2.register_module(
'ParticleVertexFitter')
449 pvfit.set_name(f
'ParticleVertexFitter_{channel.name}')
450 pvfit.param(
'listName', channel.name)
451 pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
452 pvfit.param(
'vertexFitter',
'KFit')
453 if particle.name
in [
'pi0']:
454 pvfit.param(
'fitType',
'mass')
456 pvfit.param(
'fitType',
'vertex')
457 pvfit.set_log_level(basf2.logging.log_level.ERROR)
458 path.add_module(pvfit)
461 if self.
config.monitor ==
'simple':
462 hist_variables = [channel.mvaConfig.target,
'extraInfo(decayModeID)']
463 hist_variables_2d = [(channel.mvaConfig.target,
'extraInfo(decayModeID)')]
465 hist_variables = [
'chiProb',
'mcErrors',
'mcParticleStatus',
466 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
467 hist_variables_2d = [(
'chiProb', channel.mvaConfig.target),
468 (
'chiProb',
'mcErrors'),
469 (
'chiProb',
'mcParticleStatus')]
470 for specVar
in channel.mvaConfig.spectators:
471 hist_variables_2d.append((
'chiProb', specVar))
472 hist_variables_2d.append((channel.mvaConfig.target, specVar))
473 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PreReconstruction_AfterVertex.root')
474 ma.variablesToHistogram(
476 variables=config.variables2binnings(hist_variables),
477 variables_2d=config.variables2binnings_2d(hist_variables_2d),
479 ignoreCommandLineOverride=
True,
480 directory=f
'{channel.label}',
488 Steers the reconstruction phase after the mva method was applied
490 - The application of the mva method itself.
491 - Copying all channel lists in a common one for each particle defined in particles
492 - Tag unique signal candidates, to avoid double counting of channels with overlap
497 Create a new PostReconstruction object
498 @param particles list of config.Particle objects
499 @param config config.FeiConfiguration object
508 Returns all channels for which the weightfile is missing
512 for channel
in particle.channels:
514 weightfile = f
'{channel.label}.xml'
515 if not basf2_mva.available(weightfile):
516 missing += [channel.label]
521 Check if the relevant information is already available
527 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
530 path = basf2.create_path()
533 for channel
in particle.channels:
534 expert = basf2.register_module(
'MVAExpert')
535 expert.set_name(f
'MVAExpert_{channel.name}')
537 expert.param(
'identifier', f
'{channel.label}.xml')
539 expert.param(
'identifier', f
'{self.config.prefix}_{channel.label}')
540 expert.param(
'extraInfoName',
'SignalProbability')
541 expert.param(
'listNames', [channel.name])
543 expert.set_log_level(basf2.logging.log_level.ERROR)
544 path.add_module(expert)
547 if self.
config.monitor ==
'simple':
548 hist_variables = [channel.mvaConfig.target,
'extraInfo(decayModeID)']
549 hist_variables_2d = [(channel.mvaConfig.target,
'extraInfo(decayModeID)')]
551 hist_variables = [
'mcErrors',
553 'extraInfo(SignalProbability)',
554 channel.mvaConfig.target,
555 'extraInfo(decayModeID)'] + list(channel.mvaConfig.spectators.keys())
556 hist_variables_2d = [(
'extraInfo(SignalProbability)', channel.mvaConfig.target),
557 (
'extraInfo(SignalProbability)',
'mcErrors'),
558 (
'extraInfo(SignalProbability)',
'mcParticleStatus'),
559 (
'extraInfo(decayModeID)', channel.mvaConfig.target),
560 (
'extraInfo(decayModeID)',
'mcErrors'),
561 (
'extraInfo(decayModeID)',
'mcParticleStatus')]
562 for specVar
in channel.mvaConfig.spectators:
563 hist_variables_2d.append((
'extraInfo(SignalProbability)', specVar))
564 hist_variables_2d.append((
'extraInfo(decayModeID)', specVar))
565 hist_variables_2d.append((channel.mvaConfig.target, specVar))
566 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_AfterMVA.root')
567 ma.variablesToHistogram(
569 variables=config.variables2binnings(hist_variables),
570 variables_2d=config.variables2binnings_2d(hist_variables_2d),
572 ignoreCommandLineOverride=
True,
573 directory=f
'{channel.label}',
577 if particle.postCutConfig.value > 0.0:
578 cutstring = f
'{particle.postCutConfig.value} < extraInfo(SignalProbability)'
580 ma.mergeListsWithBestDuplicate(particle.identifier, [c.name
for c
in particle.channels],
581 variable=
'particleSource', writeOut=
True, path=path)
584 if self.
config.monitor ==
'simple':
585 hist_variables = [particle.mvaConfig.target,
'extraInfo(decayModeID)']
586 hist_variables_2d = [(particle.mvaConfig.target,
'extraInfo(decayModeID)')]
588 hist_variables = [
'mcErrors',
590 'extraInfo(SignalProbability)',
591 particle.mvaConfig.target,
592 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
593 hist_variables_2d = [(
'extraInfo(decayModeID)', particle.mvaConfig.target),
594 (
'extraInfo(decayModeID)',
'mcErrors'),
595 (
'extraInfo(decayModeID)',
'mcParticleStatus')]
596 for specVar
in particle.mvaConfig.spectators:
597 hist_variables_2d.append((
'extraInfo(SignalProbability)', specVar))
598 hist_variables_2d.append((
'extraInfo(decayModeID)', specVar))
599 hist_variables_2d.append((particle.mvaConfig.target, specVar))
600 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_BeforePostCut.root')
601 ma.variablesToHistogram(
603 variables=config.variables2binnings(hist_variables),
604 variables_2d=config.variables2binnings_2d(hist_variables_2d),
606 ignoreCommandLineOverride=
True,
607 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
610 ma.applyCuts(particle.identifier, cutstring, path=path)
613 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_BeforeRanking.root')
614 ma.variablesToHistogram(
616 variables=config.variables2binnings(hist_variables),
617 variables_2d=config.variables2binnings_2d(hist_variables_2d),
619 ignoreCommandLineOverride=
True,
620 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
623 ma.rankByHighest(particle.identifier,
'extraInfo(SignalProbability)',
624 particle.postCutConfig.bestCandidateCut,
'postCut_rank', path=path)
626 uniqueSignal = basf2.register_module(
'TagUniqueSignal')
627 uniqueSignal.param(
'particleList', particle.identifier)
628 uniqueSignal.param(
'target', particle.mvaConfig.target)
629 uniqueSignal.param(
'extraInfoName',
'uniqueSignal')
630 uniqueSignal.set_name(f
'TagUniqueSignal_{particle.identifier}')
632 uniqueSignal.set_log_level(basf2.logging.log_level.ERROR)
633 path.add_module(uniqueSignal)
636 if self.
config.monitor !=
'simple':
637 hist_variables += [
'extraInfo(postCut_rank)']
638 hist_variables_2d += [(
'extraInfo(decayModeID)',
'extraInfo(postCut_rank)'),
639 (particle.mvaConfig.target,
'extraInfo(postCut_rank)'),
640 (
'mcErrors',
'extraInfo(postCut_rank)'),
641 (
'mcParticleStatus',
'extraInfo(postCut_rank)')]
642 for specVar
in particle.mvaConfig.spectators:
643 hist_variables_2d.append((
'extraInfo(postCut_rank)', specVar))
644 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_AfterRanking.root')
645 ma.variablesToHistogram(
647 variables=config.variables2binnings(hist_variables),
648 variables_2d=config.variables2binnings_2d(hist_variables_2d),
650 ignoreCommandLineOverride=
True,
651 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
654 filename = os.path.join(self.
config.monitoring_path,
'Monitor_Final.root')
655 if self.
config.monitor ==
'simple':
656 hist_variables = [
'extraInfo(uniqueSignal)',
'extraInfo(decayModeID)']
657 hist_variables_2d = [(
'extraInfo(uniqueSignal)',
'extraInfo(decayModeID)')]
658 ma.variablesToHistogram(
660 variables=config.variables2binnings(hist_variables),
661 variables_2d=config.variables2binnings_2d(hist_variables_2d),
663 ignoreCommandLineOverride=
True,
664 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
667 variables = [
'extraInfo(SignalProbability)',
'mcErrors',
'mcParticleStatus', particle.mvaConfig.target,
668 'extraInfo(uniqueSignal)',
'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
670 ma.variablesToNtuple(
673 treename=ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(
674 config.removeJPsiSlash(f
'{particle.identifier} variables')),
676 ignoreCommandLineOverride=
True,
683 Performs all necessary trainings for all training data files which are
684 available but where there is no weight file available yet.
685 This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
686 The trainings are run in parallel using multi-threading of python.
687 Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
688 * basf2_mva_teacher, the training will be done directly on the machine
689 * externClustTeacher, the training will be submitted to the batch system of KEKCC
693 MaximumNumberOfMVASamples = int(1e7)
696 MinimumNumberOfMVASamples = int(5e2)
700 Create a new Teacher object
701 @param particles list of config.Particle objects
702 @param config config.FeiConfiguration object
712 Create a fake weight file using the trivial method, it will always return 0.0
713 @param channel for which we create a fake weight file
716 <?xml version="1.0" encoding="utf-8"?>
717 <method>Trivial</method>
718 <weightfile>{channel}.xml</weightfile>
719 <treename>tree</treename>
720 <target_variable>isSignal</target_variable>
721 <weight_variable>__weight__</weight_variable>
722 <signal_class>1</signal_class>
723 <max_events>0</max_events>
724 <number_feature_variables>1</number_feature_variables>
725 <variable0>M</variable0>
726 <number_spectator_variables>0</number_spectator_variables>
727 <number_data_files>1</number_data_files>
728 <datafile0>train.root</datafile0>
729 <Trivial_version>1</Trivial_version>
730 <Trivial_output>0</Trivial_output>
731 <signal_fraction>0.066082567</signal_fraction>
733 with open(f
'{channel}.xml',
"w")
as f:
739 Checks if the provided filename is a fake-weight file or not
740 @param filename the filename of the weight file
743 return '<method>Trivial</method>' in open(filename).readlines()[2]
744 except BaseException:
750 Upload the weight file into the condition database
751 @param channel whose weight file is uploaded
753 disk = f
'{channel}.xml'
754 dbase = f
'{self.config.prefix}_{channel}'
755 basf2_mva.upload(disk, dbase)
756 print(f
"FEI-core: Uploading {dbase} to localdb")
761 Do all trainings for which we find training data
767 ROOT.PyConfig.StartGuiThread =
False
770 all_stage_particles = get_stages_from_particles(self.
particles)
771 if self.
config.cache
is None:
772 stagesToTrain = range(1, len(all_stage_particles)+1)
774 stagesToTrain = [self.
config.cache]
776 filename =
'training_input.root'
777 if os.path.isfile(filename):
778 f = ROOT.TFile.Open(filename,
'read')
780 B2WARNING(f
'Training of MVC failed: {filename}. ROOT file corrupt. No weight files will be provided.')
781 elif len([k.GetName()
for k
in f.GetListOfKeys()]) == 0:
783 f
'Training of MVC failed: {filename}. ROOT file has no trees. No weight files will be provided.')
785 for istage
in stagesToTrain:
786 for particle
in all_stage_particles[istage-1]:
787 for channel
in particle.channels:
788 weightfile = f
'{channel.label}.xml'
789 if basf2_mva.available(weightfile):
790 B2INFO(f
"FEI-core: Skipping {weightfile}, already available")
793 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{channel.label} variables')
794 keys = [m
for m
in f.GetListOfKeys()
if treeName
in m.GetName()]
796 B2WARNING(
"Training of MVC failed. "
797 f
"Couldn't find tree for channel {channel}. Ignoring channel.")
800 B2WARNING(f
"Found more than one tree for channel {channel}. Taking first tree from: {keys}")
801 tree = keys[0].ReadObj()
802 total_entries = tree.GetEntries()
803 nSig = tree.GetEntries(f
'{channel.mvaConfig.target}==1.0')
804 nBg = tree.GetEntries(f
'{channel.mvaConfig.target}==0.0')
806 f
'FEI-core: Number of events for channel: {channel.label}, '
807 f
'Total: {total_entries}, Signal: {nSig}, Background: {nBg}')
808 if nSig < Teacher.MinimumNumberOfMVASamples:
809 B2WARNING(
"Training of MVC failed. "
810 f
"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
812 self.
upload(channel.label)
814 if nBg < Teacher.MinimumNumberOfMVASamples:
815 B2WARNING(
"Training of MVC failed. "
816 f
"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
818 self.
upload(channel.label)
820 variable_str =
"' '".join(channel.mvaConfig.variables)
822 spectators = list(channel.mvaConfig.spectators.keys())
823 if channel.mvaConfig.sPlotVariable
is not None:
824 spectators.append(channel.mvaConfig.sPlotVariable)
825 spectators_str =
"' '".join(spectators)
827 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{channel.label} variables')
828 command = (f
"{self.config.externTeacher}"
829 f
" --method '{channel.mvaConfig.method}'"
830 f
" --target_variable '{channel.mvaConfig.target}'"
831 f
" --treename '{treeName}'"
832 f
" --datafile 'training_input.root'"
834 f
" --variables '{variable_str}'"
835 f
" --identifier '{weightfile}'")
836 if len(spectators) > 0:
837 command += f
" --spectators '{spectators_str}'"
838 command += f
" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1"
839 B2INFO(f
"Used following command to invoke teacher: \n {command}")
840 job_list.append((channel.label, command))
843 if len(job_list) > 0:
844 p = multiprocessing.Pool(
None, maxtasksperchild=1)
845 func = functools.partial(subprocess.call, shell=
True)
846 p.map(func, [c
for _, c
in job_list])
850 for name, _
in job_list:
851 if not basf2_mva.available(f
'{name}.xml'):
852 B2WARNING(
"Training of MVC failed. For unknown reasons, check the logfile", f
'{name}.log')
854 weightfiles.append(self.
upload(name))
858def convert_legacy_training(particles: typing.Sequence[
config.Particle], configuration: config.FeiConfiguration):
860 Convert an old FEI training into the new format.
861 The old format used hashes for the weight files, the hashes can be converted to the new naming scheme
862 using the Summary.pickle file outputted by the FEIv3. This file must be passes by the parameter configuration.legacy.
863 @param particles list of config.Particle objects
864 @param config config.FeiConfiguration object
866 summary = pickle.load(open(configuration.legacy,
'rb'))
867 channel2lists = {k: v[2]
for k, v
in summary[
'channel2lists'].items()}
869 teacher =
Teacher(particles, configuration)
871 for particle
in particles:
872 for channel
in particle.channels:
873 new_weightfile = f
'{configuration.prefix}_{channel.label}'
874 old_weightfile = f
'{configuration.prefix}_{channel2lists[channel.label.replace("Jpsi", "J/psi")]}'
875 if not basf2_mva.available(new_weightfile):
876 if old_weightfile
is None or not basf2_mva.available(old_weightfile):
877 Teacher.create_fake_weightfile(channel.label)
878 teacher.upload(channel.label)
880 basf2_mva.download(old_weightfile, f
'{channel.label}.xml')
881 teacher.upload(channel.label)
884def get_stages_from_particles(particles: typing.Sequence[typing.Union[
config.Particle, str]]):
886 Returns the hierarchical structure of the FEI.
887 Each stage depends on the particles in the previous stage.
888 The final stage is empty (meaning everything is done, and the training is finished at this point).
889 @param particles list of config.Particle or string objects
892 return p.split(
":")[0]
if isinstance(p, str)
else p.name
895 return (p.split(
":")[1]
if isinstance(p, str)
else p.label).lower()
898 [p
for p
in particles
if get_pname(p)
in [
'e+',
'K+',
'pi+',
'mu+',
'gamma',
'p+',
'K_L0']],
899 [p
for p
in particles
if get_pname(p)
in [
'pi0',
'J/psi',
'Lambda0']],
900 [p
for p
in particles
if get_pname(p)
in [
'K_S0',
'Sigma+']],
901 [p
for p
in particles
if get_pname(p)
in [
'D+',
'D0',
'D_s+',
'Lambda_c+']
and 'tag' not in get_plabel(p)],
902 [p
for p
in particles
if get_pname(p)
in [
'D*+',
'D*0',
'D_s*+']
and 'tag' not in get_plabel(p)],
903 [p
for p
in particles
if get_pname(p)
in [
'B0',
'B+',
'B_s0']
or 'tag' in get_plabel(p)],
909 if pname
not in [pname
for stage
in stages
for p
in stage]:
910 raise RuntimeError(f
"Unknown particle {pname}: Not implemented in FEI")
915def do_trainings(particles: typing.Sequence[
config.Particle], configuration: config.FeiConfiguration):
917 Performs the training of mva classifiers for all available training data,
918 this function must be either called by the user after each stage of the FEI during training,
919 or (more likely) is called by the distributed.py script after merging the outputs of all jobs,
920 @param particles list of config.Particle objects
921 @param config config.FeiConfiguration object
922 @return list of tuple with weight file on disk and identifier in database for all trained classifiers
924 teacher =
Teacher(particles, configuration)
925 return teacher.do_all_trainings()
929 configuration: config.FeiConfiguration,
931 roundMode: int =
None,
932 pickleName: str =
'Summary.pickle'):
934 Creates the Summary.pickle, which is used to keep track of the stage during the training,
935 and can be used later to investigate which configuration was used exactly to create the training.
936 @param particles list of config.Particle objects
937 @param config config.FeiConfiguration object
938 @param cache current cache level
939 @param roundMode mode of current round of training
940 @param pickleName name of the pickle file
942 if roundMode
is None:
943 roundMode = configuration.roundMode
944 configuration = configuration._replace(cache=cache, roundMode=roundMode)
946 for i
in range(8, -1, -1):
947 if os.path.isfile(f
'{pickleName}.backup_{i}'):
948 shutil.copyfile(f
'{pickleName}.backup_{i}', f
'{pickleName}.backup_{i+1}')
949 if os.path.isfile(pickleName):
950 shutil.copyfile(pickleName, f
'{pickleName}.backup_0')
951 pickle.dump((particles, configuration), open(pickleName,
'wb'))
954def get_path(particles: typing.Sequence[
config.Particle], configuration: config.FeiConfiguration) -> FeiState:
956 The most important function of the FEI.
957 This creates the FEI path for training/fitting (both terms are equal), and application/inference (both terms are equal).
958 The whole FEI is defined by the particles which are reconstructed (see default_channels.py)
959 and the configuration (see config.py).
962 For training this function is called multiple times, each time the FEI reconstructs one more stage in the hierarchical structure
963 i.e. we start with FSP, pi0, KS_0, D, D*, and with B mesons. You have to set configuration.training to True for training mode.
964 All weight files created during the training will be stored in your local database.
965 If you want to use the FEI training everywhere without copying this database by hand, you have to upload your local database
966 to the central database first (see documentation for the Belle2 Condition Database).
969 For application you call this function once, and it returns the whole path which will reconstruct B mesons
970 with an associated signal probability. You have to set configuration.training to False for application mode.
973 You can always turn on the monitoring (configuration.monitor != False),
974 to write out ROOT Histograms of many quantities for each stage,
975 using these histograms you can use the printReporting.py or latexReporting.py scripts to automatically create pdf files.
978 This function can also use old FEI trainings (version 3), just pass the Summary.pickle file of the old training,
979 and the weight files will be automatically converted to the new naming scheme.
981 @param particles list of config.Particle objects
982 @param config config.FeiConfiguration object
985 ____ _ _ _ _ ____ _ _ ____ _ _ ___ _ _ _ ___ ____ ____ ___ ____ ____ ___ ____ ___ _ ____ _ _
986 |___ | | | | |___ | | |___ |\ | | | |\ | | |___ |__/ |__] |__/ |___ | |__| | | | | |\ |
987 | |__| |___ |___ |___ \/ |___ | \| | | | \| | |___ | \ | | \ |___ | | | | | |__| | \|
989 Author: Thomas Keck 2014 - 2017
990 Please cite my PhD thesis
1001 if configuration.training
and (configuration.monitor
and (configuration.monitoring_path !=
'')):
1002 B2ERROR(
"FEI-core: Custom Monitoring path is not allowed during training!")
1004 if configuration.cache
is None:
1005 pickleName =
'Summary.pickle'
1006 if configuration.monitor:
1007 pickleName = os.path.join(configuration.monitoring_path, pickleName)
1009 if os.path.isfile(pickleName):
1010 particles_bkp, config_bkp = pickle.load(open(pickleName,
'rb'))
1012 for fd
in configuration._fields:
1013 if fd ==
'cache' or fd ==
'roundMode':
1015 if getattr(configuration, fd) != getattr(config_bkp, fd):
1017 f
"FEI-core: Configuration changed: {fd} from {getattr(config_bkp, fd)} to {getattr(configuration, fd)}")
1019 configuration = config_bkp
1020 cache = configuration.cache
1021 print(
"Cache: Replaced particles from steering and configuration from Summary.pickle: ", cache, configuration.roundMode)
1023 if configuration.training:
1028 cache = configuration.cache
1031 path = basf2.create_path()
1036 stages = get_stages_from_particles(particles)
1041 if configuration.legacy
is not None:
1042 convert_legacy_training(particles, configuration)
1051 if cache < 0
and configuration.training:
1052 print(
"Stage 0: Run over all files to count the number of events and McParticles")
1053 path.add_path(training_data_information.reconstruct())
1054 if configuration.training:
1055 save_summary(particles, configuration, 0)
1056 return FeiState(path, 0, [], [], [])
1057 elif not configuration.training
and configuration.monitor:
1058 path.add_path(training_data_information.reconstruct())
1064 loader =
FSPLoader(particles, configuration)
1066 print(
"Stage 0: Load FSP particles")
1067 path.add_path(loader.reconstruct())
1090 for stage, stage_particles
in enumerate(stages):
1091 if len(stage_particles) == 0:
1092 print(f
"Stage {stage}: No particles to reconstruct in this stage, skipping!")
1099 print(f
"Stage {stage}: PreReconstruct particles: ", [p.name
for p
in stage_particles])
1100 path.add_path(pre_reconstruction.reconstruct())
1101 if configuration.training
and not (post_reconstruction.available()
and configuration.roundMode == 0):
1102 print(f
"Stage {stage}: Create training data for particles: ", [p.name
for p
in stage_particles])
1103 mc_counts = training_data_information.get_mc_counts()
1104 training_data =
TrainingData(stage_particles, configuration, mc_counts)
1105 path.add_path(training_data.reconstruct())
1106 used_lists += [channel.name
for particle
in stage_particles
for channel
in particle.channels]
1109 used_lists += [particle.identifier
for particle
in stage_particles]
1110 if (stage >= cache - 1)
and not ((configuration.roundMode == 1)
and configuration.training):
1111 if (configuration.roundMode == 3)
and configuration.training:
1112 print(f
"Stage {stage}: BDTs already applied for particles, no postReco needed: ", [p.name
for p
in stage_particles])
1114 print(f
"Stage {stage}: Apply BDT for particles: ", [p.name
for p
in stage_particles])
1115 if configuration.training
and not post_reconstruction.available():
1116 raise RuntimeError(
"FEI-core: training of current stage was not successful, please retrain!")
1117 path.add_path(post_reconstruction.reconstruct())
1118 if (((configuration.roundMode == 2)
or (configuration.roundMode == 3))
and configuration.training):
1120 fsps_of_all_stages = [fsp
for sublist
in get_stages_from_particles(loader.get_fsp_lists())
for fsp
in sublist]
1123 if configuration.training
and (configuration.roundMode == 3):
1124 dontRemove = used_lists + fsps_of_all_stages
1126 cleanup = basf2.register_module(
'RemoveParticlesNotInLists')
1127 print(
"FEI-REtrain: pruning basf2_input.root of higher stages")
1128 cleanup.param(
'particleLists', dontRemove)
1129 path.add_module(cleanup)
1133 excludedParticlesNonConjugated = [p.identifier
for p
in particles
if p.identifier
not in dontRemove]
1134 excludedParticles = [
1135 str(name)
for name
in list(
1136 ROOT.Belle2.ParticleListName.addAntiParticleLists(excludedParticlesNonConjugated))]
1137 root_file = ROOT.TFile.Open(
'basf2_input.root',
"READ")
1138 tree = root_file.Get(
'tree')
1139 for branch
in tree.GetListOfBranches():
1140 branchName = branch.GetName()
1141 if any(exParticle
in branchName
for exParticle
in excludedParticles):
1142 excludelists.append(branchName)
1143 print(
"Exclude lists from output: ", excludelists)
1147 if configuration.monitor:
1148 print(
"Add ModuleStatistics")
1149 output = basf2.register_module(
'RootOutput')
1150 output.param(
'outputFileName', os.path.join(configuration.monitoring_path,
'Monitor_ModuleStatistics.root'))
1151 output.param(
'branchNames', [
'EventMetaData'])
1152 output.param(
'branchNamesPersistent', [
'ProcessStatistics'])
1153 output.param(
'ignoreCommandLineOverride',
True)
1154 path.add_module(output)
1158 if configuration.training
or configuration.monitor:
1159 print(
"Save Summary.pickle")
1160 save_summary(particles, configuration, stage+1, pickleName=os.path.join(configuration.monitoring_path,
'Summary.pickle'))
1163 return FeiState(path, stage+1, plists=used_lists, fsplists=fsps_of_all_stages, excludelists=excludelists)
pybasf2.Path reconstruct(self)
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
typing.List[str] get_fsp_lists(self)
config
config.FeiConfiguration object
particles
list of config.Particle objects
pybasf2.Path reconstruct(self)
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
typing.Sequence[str] get_missing_channels(self)
config
config.FeiConfiguration object
particles
list of config.Particle objects
pybasf2.Path reconstruct(self)
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
config
config.FeiConfiguration object
particles
list of config.Particle objects
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
upload(self, str channel)
create_fake_weightfile(str channel)
config
config.FeiConfiguration object
particles
list of config.Particle objects
check_if_weightfile_is_fake(str filename)
pybasf2.Path reconstruct(self)
mc_counts
containing number of MC Particles
config
config.FeiConfiguration object
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config, typing.Mapping[int, typing.Mapping[str, float]] mc_counts)
particles
list of config.Particle objects