12 The Full Event Interpretation Algorithm
15 - The algorithm will automatically reconstruct B mesons and calculate a signal probability for each candidate.
16 - It can be used for hadronic and semileptonic tagging.
17 - The algorithm has to be trained on MC, and can afterwards be applied on data.
18 - The training requires O(100) million MC events
19 - The weight files are stored in the Belle II Condition database
21 Read this file if you want to understand the technical details of the FEI.
23 The FEI follows a hierarchical approach.
25 (Stage -1: Write out information about the provided data sample)
26 Stage 0: Final State Particles (FSP)
27 Stage 1: pi0, J/Psi, Lambda0
29 Stage 3: D and Lambda_c mesons
34 Most stages consists of:
35 - Create Particle Candidates
38 - Apply a multivariate classification method
41 The FEI will reconstruct these 7 stages during the training phase,
42 since the stages depend on one another, you have to run basf2 multiple (7) times on the same data
43 to train all the necessary multivariate classifiers.
48from basf2
import B2INFO, B2WARNING, B2ERROR
50import modularAnalysis
as ma
70FeiState = collections.namedtuple(
'FeiState',
'path, stage, plists, fsplists, excludelists')
75 Contains the relevant information about the used training data.
76 Basically we write out the number of MC particles in the whole dataset.
77 This numbers we can use to calculate what fraction of candidates we have to write
78 out as TrainingData to get a reasonable amount of candidates to train on
79 (too few candidates will lead to heavy overtraining, too many won't fit into memory).
80 Secondly we can use this information for the generation of the monitoring pdfs,
81 where we calculate reconstruction efficiencies.
86 Create a new TrainingData object
87 @param particles list of config.Particle objects
88 @param outputPath path to the output directory
93 self.
filename = os.path.join(outputPath,
'mcParticlesCount.root')
97 Check if the relevant information is already available
103 Returns pybasf2.Path which counts the number of MCParticles in each event.
104 @param particles list of config.Particle objects
109 path = basf2.create_path()
110 module = basf2.register_module(
'VariablesToHistogram')
111 module.set_name(
"VariablesToHistogram_MCCount")
112 module.param(
'variables', [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5)
for pdg
in pdgs])
113 module.param(
'fileName', self.
filename)
114 module.param(
'ignoreCommandLineOverride',
True)
115 path.add_module(module)
120 Read out the number of MC particles from the file created by reconstruct
125 root_file = ROOT.TFile.Open(self.
filename,
'read')
128 for key
in root_file.GetListOfKeys():
129 variable = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
130 pdg = abs(int(variable[len(
'NumberOfMCParticlesInEvent('):-len(
")")]))
133 mc_counts[pdg][
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
134 for bin
in range(hist.GetNbinsX()))
135 mc_counts[pdg][
'std'] = hist.GetStdDev()
136 mc_counts[pdg][
'avg'] = hist.GetMean()
137 mc_counts[pdg][
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
138 mc_counts[pdg][
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
141 mc_counts[0][
'sum'] = hist.GetEntries()
148 Steers the loading of FSP particles.
149 This does NOT include RootInput, Geometry or anything required before loading FSPs,
150 the user has to add this himself (because it depends on the MC campaign and if you want
151 to use Belle or Belle II).
156 Create a new FSPLoader object
157 @param particles list of config.Particle objects
158 @param config config.FeiConfiguration object
167 Returns a list of FSP particle lists which are used in the FEI.
168 This is used to create the RootOutput module.
170 fsps = [
'K+:FSP',
'pi+:FSP',
'e+:FSP',
'mu+:FSP',
'p+:FSP',
'gamma:FSP',
'K_S0:V0',
'Lambda0:V0',
'K_L0:FSP',
'gamma:V0']
177 Returns pybasf2.Path which loads the FSP Particles
179 path = basf2.create_path()
182 ma.fillParticleLists([(
'K+:FSP',
''), (
'pi+:FSP',
''), (
'e+:FSP',
''),
183 (
'mu+:FSP',
''), (
'p+:FSP',
'')], writeOut=
True, path=path)
184 for outputList, inputList
in [(
'gamma:FSP',
'gamma:mdst'), (
'K_S0:V0',
'K_S0:mdst'),
185 (
'Lambda0:V0',
'Lambda0:mdst'), (
'K_L0:FSP',
'K_L0:mdst'),
186 (
'pi0:FSP',
'pi0:mdst'), (
'gamma:V0',
'gamma:v0mdst')]:
187 ma.copyParticles(outputList, inputList, writeOut=
True, path=path)
189 ma.fillParticleLists([(
'K+:FSP',
''), (
'pi+:FSP',
''), (
'e+:FSP',
''),
190 (
'mu+:FSP',
''), (
'gamma:FSP',
''),
191 (
'p+:FSP',
''), (
'K_L0:FSP',
'')], writeOut=
True, path=path)
192 ma.fillParticleList(
'K_S0:V0 -> pi+ pi-',
'', writeOut=
True, path=path)
193 ma.fillParticleList(
'Lambda0:V0 -> p+ pi-',
'', writeOut=
True, path=path)
194 ma.fillConvertedPhotonsList(
'gamma:V0 -> e+ e-',
'', writeOut=
True, path=path)
197 names = [
'e+',
'K+',
'pi+',
'mu+',
'gamma',
'K_S0',
'p+',
'K_L0',
'Lambda0',
'pi0']
198 filename = os.path.join(self.
config.monitoring_path,
'Monitor_FSPLoader.root')
200 variables = [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5)
for pdg
in pdgs]
201 ma.variablesToHistogram(
'', variables=variables, filename=filename, ignoreCommandLineOverride=
True, path=path)
207 Steers the creation of the training data.
208 The training data is used to train a multivariate classifier for each channel.
209 The training of the FEI at its core is just generating this training data for each channel.
210 After we created the training data for a stage, we have to train the classifiers (see Teacher class further down).
214 mc_counts: typing.Mapping[int, typing.Mapping[str, float]]):
216 Create a new TrainingData object
217 @param particles list of config.Particle objects
218 @param config config.FeiConfiguration object
219 @param mc_counts containing number of MC Particles
230 Returns pybasf2.Path which creates the training data for the given particles
233 path = basf2.create_path()
238 print(f
"FEI-core: TrainingData: nSignal for {particle.name}: {nSignal}")
247 for channel
in particle.channels:
248 weightfile = f
'{channel.label}.xml'
249 if basf2_mva.available(weightfile):
250 B2INFO(f
"FEI-core: Skipping preparing Training Data for {weightfile}, already available")
252 filename =
'training_input.root'
255 nBackground = self.
mc_counts[0][
'sum'] * channel.preCutConfig.bestCandidateCut
256 inverseSamplingRates = {}
259 if nBackground > Teacher.MaximumNumberOfMVASamples
and not channel.preCutConfig.noBackgroundSampling:
260 inverseSamplingRates[0] = max(
261 1, int((int(nBackground / Teacher.MaximumNumberOfMVASamples) + 1) * channel.preCutConfig.bkgSamplingFactor))
262 elif channel.preCutConfig.bkgSamplingFactor > 1:
263 inverseSamplingRates[0] = int(channel.preCutConfig.bkgSamplingFactor)
265 if nSignal > Teacher.MaximumNumberOfMVASamples
and not channel.preCutConfig.noSignalSampling:
266 inverseSamplingRates[1] = int(nSignal / Teacher.MaximumNumberOfMVASamples) + 1
268 spectators = [channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
269 if channel.mvaConfig.sPlotVariable
is not None:
270 spectators.append(channel.mvaConfig.sPlotVariable)
273 hist_variables = [
'mcErrors',
'mcParticleStatus'] + channel.mvaConfig.variables + spectators
274 hist_variables_2d = [(x, channel.mvaConfig.target)
275 for x
in channel.mvaConfig.variables + spectators
if x
is not channel.mvaConfig.target]
276 hist_filename = os.path.join(self.
config.monitoring_path,
'Monitor_TrainingData.root')
277 ma.variablesToHistogram(channel.name, variables=config.variables2binnings(hist_variables),
278 variables_2d=config.variables2binnings_2d(hist_variables_2d),
279 filename=hist_filename,
280 ignoreCommandLineOverride=
True,
281 directory=config.removeJPsiSlash(f
'{channel.label}'), path=path)
283 teacher = basf2.register_module(
'VariablesToNtuple')
284 teacher.set_name(f
'VariablesToNtuple_{channel.name}')
285 teacher.param(
'fileName', filename)
286 teacher.param(
'treeName', ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{channel.label} variables'))
287 teacher.param(
'variables', channel.mvaConfig.variables + spectators)
288 teacher.param(
'particleList', channel.name)
289 teacher.param(
'sampling', (channel.mvaConfig.target, inverseSamplingRates))
290 teacher.param(
'ignoreCommandLineOverride',
True)
291 path.add_module(teacher)
297 Steers the reconstruction phase before the mva method was applied
299 - The ParticleCombination (for each particle and channel we create candidates using
300 the daughter candidates from the previous stages)
302 - Vertex Fitting (this is the slowest part of the whole FEI, KFit is used by default,
303 but you can use fastFit as a drop-in replacement https://github.com/thomaskeck/FastFit/,
304 this will speed up the whole FEI by a factor 2-3)
309 Create a new PreReconstruction object
310 @param particles list of config.Particle objects
311 @param config config.FeiConfiguration object
320 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
322 path = basf2.create_path()
325 for channel
in particle.channels:
328 channel.daughters[0].split(
':')[0]) ==
pdg.from_name(particle.name)):
329 ma.cutAndCopyList(channel.name, channel.daughters[0], channel.preCutConfig.userCut, writeOut=
True, path=path)
330 v2EI = basf2.register_module(
'VariablesToExtraInfo')
331 v2EI.set_name(f
'VariablesToExtraInfo_{channel.name}')
332 v2EI.param(
'particleList', channel.name)
333 v2EI.param(
'variables', {f
'constant({channel.decayModeID})':
'decayModeID'})
335 v2EI.set_log_level(basf2.logging.log_level.ERROR)
336 path.add_module(v2EI)
338 ma.reconstructDecay(channel.decayString, channel.preCutConfig.userCut, channel.decayModeID,
339 writeOut=
True, path=path)
341 ma.matchMCTruth(channel.name, path=path)
342 bc_variable = channel.preCutConfig.bestCandidateVariable
343 if self.
config.monitor ==
'simple':
344 hist_variables = [channel.mvaConfig.target,
'extraInfo(decayModeID)']
345 hist_variables_2d = [(channel.mvaConfig.target,
'extraInfo(decayModeID)')]
347 hist_variables = [bc_variable,
'mcErrors',
'mcParticleStatus',
348 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
349 hist_variables_2d = [(bc_variable, channel.mvaConfig.target),
350 (bc_variable,
'mcErrors'),
351 (bc_variable,
'mcParticleStatus')]
352 for specVar
in channel.mvaConfig.spectators:
353 hist_variables_2d.append((bc_variable, specVar))
354 hist_variables_2d.append((channel.mvaConfig.target, specVar))
355 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PreReconstruction_BeforeRanking.root')
356 ma.variablesToHistogram(
358 variables=config.variables2binnings(hist_variables),
359 variables_2d=config.variables2binnings_2d(hist_variables_2d),
361 ignoreCommandLineOverride=
True,
362 directory=f
'{channel.label}',
365 if channel.preCutConfig.bestCandidateMode ==
'lowest':
366 ma.rankByLowest(channel.name,
367 channel.preCutConfig.bestCandidateVariable,
368 channel.preCutConfig.bestCandidateCut,
371 elif channel.preCutConfig.bestCandidateMode ==
'highest':
372 ma.rankByHighest(channel.name,
373 channel.preCutConfig.bestCandidateVariable,
374 channel.preCutConfig.bestCandidateCut,
378 raise RuntimeError(f
'Unknown bestCandidateMode {repr(channel.preCutConfig.bestCandidateMode)}')
380 if 'gamma' in channel.decayString
and channel.pi0veto:
381 ma.buildRestOfEvent(channel.name, path=path)
382 Ddaughter_roe_path = basf2.Path()
383 deadEndPath = basf2.Path()
384 ma.signalSideParticleFilter(channel.name,
'', Ddaughter_roe_path, deadEndPath)
385 ma.fillParticleList(
'gamma:roe',
'isInRestOfEvent == 1', path=Ddaughter_roe_path)
387 matches = list(re.finditer(
'gamma', channel.decayString))
389 for igamma
in range(len(matches)):
390 start, end = matches[igamma-1].span()
391 tempString = f
'{channel.decayString[:start]}^gamma{channel.decayString[end:]}'
392 ma.fillSignalSideParticleList(f
'gamma:sig_{igamma}', tempString, path=Ddaughter_roe_path)
393 ma.reconstructDecay(f
'pi0:veto_{igamma} -> gamma:sig_{igamma} gamma:roe',
'', path=Ddaughter_roe_path)
394 pi0lists.append(f
'pi0:veto_{igamma}')
395 ma.copyLists(
'pi0:veto', pi0lists, writeOut=
False, path=Ddaughter_roe_path)
396 ma.rankByLowest(
'pi0:veto',
'abs(dM)', 1, path=Ddaughter_roe_path)
397 ma.matchMCTruth(
'pi0:veto', path=Ddaughter_roe_path)
398 ma.variableToSignalSideExtraInfo(
401 'InvM':
'pi0vetoMass',
402 'formula((daughter(0,E)-daughter(1,E))/(daughter(0,E)+daughter(1,E)))':
'pi0vetoEnergyAsymmetry',
404 path=Ddaughter_roe_path
406 path.for_each(
'RestOfEvent',
'RestOfEvents', Ddaughter_roe_path)
409 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PreReconstruction_AfterRanking.root')
410 if self.
config.monitor !=
'simple':
411 hist_variables += [
'extraInfo(preCut_rank)']
412 hist_variables_2d += [(
'extraInfo(preCut_rank)', channel.mvaConfig.target),
413 (
'extraInfo(preCut_rank)',
'mcErrors'),
414 (
'extraInfo(preCut_rank)',
'mcParticleStatus')]
415 for specVar
in channel.mvaConfig.spectators:
416 hist_variables_2d.append((
'extraInfo(preCut_rank)', specVar))
417 ma.variablesToHistogram(
419 variables=config.variables2binnings(hist_variables),
420 variables_2d=config.variables2binnings_2d(hist_variables_2d),
422 ignoreCommandLineOverride=
True,
423 directory=f
'{channel.label}',
427 elif self.
config.training:
428 ma.matchMCTruth(channel.name, path=path)
431 pvfit = basf2.register_module(
'ParticleVertexFitter')
432 pvfit.set_name(f
'ParticleVertexFitter_{channel.name}')
433 pvfit.param(
'listName', channel.name)
434 pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
435 pvfit.param(
'vertexFitter',
'KFit')
436 pvfit.param(
'fitType',
'vertex')
437 pvfit.set_log_level(basf2.logging.log_level.ERROR)
438 path.add_module(pvfit)
439 elif re.findall(
r"[\w']+", channel.decayString).count(
'pi0') > 1
and particle.name !=
'pi0':
440 basf2.B2INFO(f
"Ignoring vertex fit for {channel.name} because multiple pi0 are not supported yet.")
441 elif len(channel.daughters) > 1:
442 pvfit = basf2.register_module(
'ParticleVertexFitter')
443 pvfit.set_name(f
'ParticleVertexFitter_{channel.name}')
444 pvfit.param(
'listName', channel.name)
445 pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
446 pvfit.param(
'vertexFitter',
'KFit')
447 if particle.name
in [
'pi0']:
448 pvfit.param(
'fitType',
'mass')
450 pvfit.param(
'fitType',
'vertex')
451 pvfit.set_log_level(basf2.logging.log_level.ERROR)
452 path.add_module(pvfit)
455 if self.
config.monitor ==
'simple':
456 hist_variables = [channel.mvaConfig.target,
'extraInfo(decayModeID)']
457 hist_variables_2d = [(channel.mvaConfig.target,
'extraInfo(decayModeID)')]
459 hist_variables = [
'chiProb',
'mcErrors',
'mcParticleStatus',
460 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
461 hist_variables_2d = [(
'chiProb', channel.mvaConfig.target),
462 (
'chiProb',
'mcErrors'),
463 (
'chiProb',
'mcParticleStatus')]
464 for specVar
in channel.mvaConfig.spectators:
465 hist_variables_2d.append((
'chiProb', specVar))
466 hist_variables_2d.append((channel.mvaConfig.target, specVar))
467 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PreReconstruction_AfterVertex.root')
468 ma.variablesToHistogram(
470 variables=config.variables2binnings(hist_variables),
471 variables_2d=config.variables2binnings_2d(hist_variables_2d),
473 ignoreCommandLineOverride=
True,
474 directory=f
'{channel.label}',
482 Steers the reconstruction phase after the mva method was applied
484 - The application of the mva method itself.
485 - Copying all channel lists in a common one for each particle defined in particles
486 - Tag unique signal candidates, to avoid double counting of channels with overlap
491 Create a new PostReconstruction object
492 @param particles list of config.Particle objects
493 @param config config.FeiConfiguration object
502 Returns all channels for which the weightfile is missing
506 for channel
in particle.channels:
508 weightfile = f
'{channel.label}.xml'
509 if not basf2_mva.available(weightfile):
510 missing += [channel.label]
515 Check if the relevant information is already available
521 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
524 path = basf2.create_path()
527 for channel
in particle.channels:
528 expert = basf2.register_module(
'MVAExpert')
529 expert.set_name(f
'MVAExpert_{channel.name}')
531 expert.param(
'identifier', f
'{channel.label}.xml')
533 expert.param(
'identifier', f
'{self.config.prefix}_{channel.label}')
534 expert.param(
'extraInfoName',
'SignalProbability')
535 expert.param(
'listNames', [channel.name])
537 expert.set_log_level(basf2.logging.log_level.ERROR)
538 path.add_module(expert)
541 if self.
config.monitor ==
'simple':
542 hist_variables = [channel.mvaConfig.target,
'extraInfo(decayModeID)']
543 hist_variables_2d = [(channel.mvaConfig.target,
'extraInfo(decayModeID)')]
545 hist_variables = [
'mcErrors',
547 'extraInfo(SignalProbability)',
548 channel.mvaConfig.target,
549 'extraInfo(decayModeID)'] + list(channel.mvaConfig.spectators.keys())
550 hist_variables_2d = [(
'extraInfo(SignalProbability)', channel.mvaConfig.target),
551 (
'extraInfo(SignalProbability)',
'mcErrors'),
552 (
'extraInfo(SignalProbability)',
'mcParticleStatus'),
553 (
'extraInfo(decayModeID)', channel.mvaConfig.target),
554 (
'extraInfo(decayModeID)',
'mcErrors'),
555 (
'extraInfo(decayModeID)',
'mcParticleStatus')]
556 for specVar
in channel.mvaConfig.spectators:
557 hist_variables_2d.append((
'extraInfo(SignalProbability)', specVar))
558 hist_variables_2d.append((
'extraInfo(decayModeID)', specVar))
559 hist_variables_2d.append((channel.mvaConfig.target, specVar))
560 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_AfterMVA.root')
561 ma.variablesToHistogram(
563 variables=config.variables2binnings(hist_variables),
564 variables_2d=config.variables2binnings_2d(hist_variables_2d),
566 ignoreCommandLineOverride=
True,
567 directory=f
'{channel.label}',
571 if particle.postCutConfig.value > 0.0:
572 cutstring = f
'{particle.postCutConfig.value} < extraInfo(SignalProbability)'
574 ma.mergeListsWithBestDuplicate(particle.identifier, [c.name
for c
in particle.channels],
575 variable=
'particleSource', writeOut=
True, path=path)
578 if self.
config.monitor ==
'simple':
579 hist_variables = [particle.mvaConfig.target,
'extraInfo(decayModeID)']
580 hist_variables_2d = [(particle.mvaConfig.target,
'extraInfo(decayModeID)')]
582 hist_variables = [
'mcErrors',
584 'extraInfo(SignalProbability)',
585 particle.mvaConfig.target,
586 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
587 hist_variables_2d = [(
'extraInfo(decayModeID)', particle.mvaConfig.target),
588 (
'extraInfo(decayModeID)',
'mcErrors'),
589 (
'extraInfo(decayModeID)',
'mcParticleStatus')]
590 for specVar
in particle.mvaConfig.spectators:
591 hist_variables_2d.append((
'extraInfo(SignalProbability)', specVar))
592 hist_variables_2d.append((
'extraInfo(decayModeID)', specVar))
593 hist_variables_2d.append((particle.mvaConfig.target, specVar))
594 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_BeforePostCut.root')
595 ma.variablesToHistogram(
597 variables=config.variables2binnings(hist_variables),
598 variables_2d=config.variables2binnings_2d(hist_variables_2d),
600 ignoreCommandLineOverride=
True,
601 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
604 ma.applyCuts(particle.identifier, cutstring, path=path)
607 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_BeforeRanking.root')
608 ma.variablesToHistogram(
610 variables=config.variables2binnings(hist_variables),
611 variables_2d=config.variables2binnings_2d(hist_variables_2d),
613 ignoreCommandLineOverride=
True,
614 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
617 ma.rankByHighest(particle.identifier,
'extraInfo(SignalProbability)',
618 particle.postCutConfig.bestCandidateCut,
'postCut_rank', path=path)
620 uniqueSignal = basf2.register_module(
'TagUniqueSignal')
621 uniqueSignal.param(
'particleList', particle.identifier)
622 uniqueSignal.param(
'target', particle.mvaConfig.target)
623 uniqueSignal.param(
'extraInfoName',
'uniqueSignal')
624 uniqueSignal.set_name(f
'TagUniqueSignal_{particle.identifier}')
626 uniqueSignal.set_log_level(basf2.logging.log_level.ERROR)
627 path.add_module(uniqueSignal)
630 if self.
config.monitor !=
'simple':
631 hist_variables += [
'extraInfo(postCut_rank)']
632 hist_variables_2d += [(
'extraInfo(decayModeID)',
'extraInfo(postCut_rank)'),
633 (particle.mvaConfig.target,
'extraInfo(postCut_rank)'),
634 (
'mcErrors',
'extraInfo(postCut_rank)'),
635 (
'mcParticleStatus',
'extraInfo(postCut_rank)')]
636 for specVar
in particle.mvaConfig.spectators:
637 hist_variables_2d.append((
'extraInfo(postCut_rank)', specVar))
638 filename = os.path.join(self.
config.monitoring_path,
'Monitor_PostReconstruction_AfterRanking.root')
639 ma.variablesToHistogram(
641 variables=config.variables2binnings(hist_variables),
642 variables_2d=config.variables2binnings_2d(hist_variables_2d),
644 ignoreCommandLineOverride=
True,
645 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
648 filename = os.path.join(self.
config.monitoring_path,
'Monitor_Final.root')
649 if self.
config.monitor ==
'simple':
650 hist_variables = [
'extraInfo(uniqueSignal)',
'extraInfo(decayModeID)']
651 hist_variables_2d = [(
'extraInfo(uniqueSignal)',
'extraInfo(decayModeID)')]
652 ma.variablesToHistogram(
654 variables=config.variables2binnings(hist_variables),
655 variables_2d=config.variables2binnings_2d(hist_variables_2d),
657 ignoreCommandLineOverride=
True,
658 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
661 variables = [
'extraInfo(SignalProbability)',
'mcErrors',
'mcParticleStatus', particle.mvaConfig.target,
662 'extraInfo(uniqueSignal)',
'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
664 ma.variablesToNtuple(
667 treename=ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(
668 config.removeJPsiSlash(f
'{particle.identifier} variables')),
670 ignoreCommandLineOverride=
True,
677 Performs all necessary trainings for all training data files which are
678 available but where there is no weight file available yet.
679 This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
680 The trainings are run in parallel using multi-threading of python.
681 Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
682 * basf2_mva_teacher, the training will be done directly on the machine
683 * externClustTeacher, the training will be submitted to the batch system of KEKCC
687 MaximumNumberOfMVASamples = int(1e7)
690 MinimumNumberOfMVASamples = int(5e2)
694 Create a new Teacher object
695 @param particles list of config.Particle objects
696 @param config config.FeiConfiguration object
706 Create a fake weight file using the trivial method, it will always return 0.0
707 @param channel for which we create a fake weight file
710 <?xml version="1.0" encoding="utf-8"?>
711 <method>Trivial</method>
712 <weightfile>{channel}.xml</weightfile>
713 <treename>tree</treename>
714 <target_variable>isSignal</target_variable>
715 <weight_variable>__weight__</weight_variable>
716 <signal_class>1</signal_class>
717 <max_events>0</max_events>
718 <number_feature_variables>1</number_feature_variables>
719 <variable0>M</variable0>
720 <number_spectator_variables>0</number_spectator_variables>
721 <number_data_files>1</number_data_files>
722 <datafile0>train.root</datafile0>
723 <Trivial_version>1</Trivial_version>
724 <Trivial_output>0</Trivial_output>
725 <signal_fraction>0.066082567</signal_fraction>
727 with open(f
'{channel}.xml',
"w")
as f:
733 Checks if the provided filename is a fake-weight file or not
734 @param filename the filename of the weight file
737 return '<method>Trivial</method>' in open(filename).readlines()[2]
738 except BaseException:
744 Upload the weight file into the condition database
745 @param channel whose weight file is uploaded
747 disk = f
'{channel}.xml'
748 dbase = f
'{self.config.prefix}_{channel}'
749 basf2_mva.upload(disk, dbase)
750 print(f
"FEI-core: Uploading {dbase} to localdb")
755 Do all trainings for which we find training data
761 ROOT.PyConfig.StartGuiThread =
False
764 all_stage_particles = get_stages_from_particles(self.
particles)
765 if self.
config.cache
is None:
766 stagesToTrain = range(1, len(all_stage_particles)+1)
768 stagesToTrain = [self.
config.cache]
770 filename =
'training_input.root'
771 if os.path.isfile(filename):
772 f = ROOT.TFile.Open(filename,
'read')
774 B2WARNING(f
'Training of MVC failed: {filename}. ROOT file corrupt. No weight files will be provided.')
775 elif len([k.GetName()
for k
in f.GetListOfKeys()]) == 0:
777 f
'Training of MVC failed: {filename}. ROOT file has no trees. No weight files will be provided.')
779 for istage
in stagesToTrain:
780 for particle
in all_stage_particles[istage-1]:
781 for channel
in particle.channels:
782 weightfile = f
'{channel.label}.xml'
783 if basf2_mva.available(weightfile):
784 B2INFO(f
"FEI-core: Skipping {weightfile}, already available")
787 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{channel.label} variables')
788 keys = [m
for m
in f.GetListOfKeys()
if treeName
in m.GetName()]
790 B2WARNING(
"Training of MVC failed. "
791 f
"Couldn't find tree for channel {channel}. Ignoring channel.")
794 B2WARNING(f
"Found more than one tree for channel {channel}. Taking first tree from: {keys}")
795 tree = keys[0].ReadObj()
796 total_entries = tree.GetEntries()
797 nSig = tree.GetEntries(f
'{channel.mvaConfig.target}==1.0')
798 nBg = tree.GetEntries(f
'{channel.mvaConfig.target}==0.0')
800 f
'FEI-core: Number of events for channel: {channel.label}, '
801 f
'Total: {total_entries}, Signal: {nSig}, Background: {nBg}')
802 if nSig < Teacher.MinimumNumberOfMVASamples:
803 B2WARNING(
"Training of MVC failed. "
804 f
"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
806 self.
upload(channel.label)
808 if nBg < Teacher.MinimumNumberOfMVASamples:
809 B2WARNING(
"Training of MVC failed. "
810 f
"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
812 self.
upload(channel.label)
814 variable_str =
"' '".join(channel.mvaConfig.variables)
816 spectators = list(channel.mvaConfig.spectators.keys())
817 if channel.mvaConfig.sPlotVariable
is not None:
818 spectators.append(channel.mvaConfig.sPlotVariable)
819 spectators_str =
"' '".join(spectators)
821 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{channel.label} variables')
822 command = (f
"{self.config.externTeacher}"
823 f
" --method '{channel.mvaConfig.method}'"
824 f
" --target_variable '{channel.mvaConfig.target}'"
825 f
" --treename '{treeName}'"
826 f
" --datafile 'training_input.root'"
828 f
" --variables '{variable_str}'"
829 f
" --identifier '{weightfile}'")
830 if len(spectators) > 0:
831 command += f
" --spectators '{spectators_str}'"
832 command += f
" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1"
833 B2INFO(f
"Used following command to invoke teacher: \n {command}")
834 job_list.append((channel.label, command))
837 if len(job_list) > 0:
838 p = multiprocessing.Pool(
None, maxtasksperchild=1)
839 func = functools.partial(subprocess.call, shell=
True)
840 p.map(func, [c
for _, c
in job_list])
844 for name, _
in job_list:
845 if not basf2_mva.available(f
'{name}.xml'):
846 B2WARNING(
"Training of MVC failed. For unknown reasons, check the logfile", f
'{name}.log')
848 weightfiles.append(self.
upload(name))
852def convert_legacy_training(particles: typing.Sequence[
config.Particle], configuration: config.FeiConfiguration):
854 Convert an old FEI training into the new format.
855 The old format used hashes for the weight files, the hashes can be converted to the new naming scheme
856 using the Summary.pickle file outputted by the FEIv3. This file must be passes by the parameter configuration.legacy.
857 @param particles list of config.Particle objects
858 @param config config.FeiConfiguration object
860 summary = pickle.load(open(configuration.legacy,
'rb'))
861 channel2lists = {k: v[2]
for k, v
in summary[
'channel2lists'].items()}
863 teacher =
Teacher(particles, configuration)
865 for particle
in particles:
866 for channel
in particle.channels:
867 new_weightfile = f
'{configuration.prefix}_{channel.label}'
868 old_weightfile = f
'{configuration.prefix}_{channel2lists[channel.label.replace("Jpsi", "J/psi")]}'
869 if not basf2_mva.available(new_weightfile):
870 if old_weightfile
is None or not basf2_mva.available(old_weightfile):
871 Teacher.create_fake_weightfile(channel.label)
872 teacher.upload(channel.label)
874 basf2_mva.download(old_weightfile, f
'{channel.label}.xml')
875 teacher.upload(channel.label)
878def get_stages_from_particles(particles: typing.Sequence[typing.Union[
config.Particle, str]]):
880 Returns the hierarchical structure of the FEI.
881 Each stage depends on the particles in the previous stage.
882 The final stage is empty (meaning everything is done, and the training is finished at this point).
883 @param particles list of config.Particle or string objects
886 return p.split(
":")[0]
if isinstance(p, str)
else p.name
889 return (p.split(
":")[1]
if isinstance(p, str)
else p.label).lower()
892 [p
for p
in particles
if get_pname(p)
in [
'e+',
'K+',
'pi+',
'mu+',
'gamma',
'p+',
'K_L0']],
893 [p
for p
in particles
if get_pname(p)
in [
'pi0',
'J/psi',
'Lambda0']],
894 [p
for p
in particles
if get_pname(p)
in [
'K_S0',
'Sigma+']],
895 [p
for p
in particles
if get_pname(p)
in [
'D+',
'D0',
'D_s+',
'Lambda_c+']
and 'tag' not in get_plabel(p)],
896 [p
for p
in particles
if get_pname(p)
in [
'D*+',
'D*0',
'D_s*+']
and 'tag' not in get_plabel(p)],
897 [p
for p
in particles
if get_pname(p)
in [
'B0',
'B+',
'B_s0']
or 'tag' in get_plabel(p)],
903 if pname
not in [pname
for stage
in stages
for p
in stage]:
904 raise RuntimeError(f
"Unknown particle {pname}: Not implemented in FEI")
909def do_trainings(particles: typing.Sequence[
config.Particle], configuration: config.FeiConfiguration):
911 Performs the training of mva classifiers for all available training data,
912 this function must be either called by the user after each stage of the FEI during training,
913 or (more likely) is called by the distributed.py script after merging the outputs of all jobs,
914 @param particles list of config.Particle objects
915 @param config config.FeiConfiguration object
916 @return list of tuple with weight file on disk and identifier in database for all trained classifiers
918 teacher =
Teacher(particles, configuration)
919 return teacher.do_all_trainings()
923 configuration: config.FeiConfiguration,
925 roundMode: int =
None,
926 pickleName: str =
'Summary.pickle'):
928 Creates the Summary.pickle, which is used to keep track of the stage during the training,
929 and can be used later to investigate which configuration was used exactly to create the training.
930 @param particles list of config.Particle objects
931 @param config config.FeiConfiguration object
932 @param cache current cache level
933 @param roundMode mode of current round of training
934 @param pickleName name of the pickle file
936 if roundMode
is None:
937 roundMode = configuration.roundMode
938 configuration = configuration._replace(cache=cache, roundMode=roundMode)
940 for i
in range(8, -1, -1):
941 if os.path.isfile(f
'{pickleName}.backup_{i}'):
942 shutil.copyfile(f
'{pickleName}.backup_{i}', f
'{pickleName}.backup_{i+1}')
943 if os.path.isfile(pickleName):
944 shutil.copyfile(pickleName, f
'{pickleName}.backup_0')
945 pickle.dump((particles, configuration), open(pickleName,
'wb'))
948def get_path(particles: typing.Sequence[
config.Particle], configuration: config.FeiConfiguration) -> FeiState:
950 The most important function of the FEI.
951 This creates the FEI path for training/fitting (both terms are equal), and application/inference (both terms are equal).
952 The whole FEI is defined by the particles which are reconstructed (see default_channels.py)
953 and the configuration (see config.py).
956 For training this function is called multiple times, each time the FEI reconstructs one more stage in the hierarchical structure
957 i.e. we start with FSP, pi0, KS_0, D, D*, and with B mesons. You have to set configuration.training to True for training mode.
958 All weight files created during the training will be stored in your local database.
959 If you want to use the FEI training everywhere without copying this database by hand, you have to upload your local database
960 to the central database first (see documentation for the Belle2 Condition Database).
963 For application you call this function once, and it returns the whole path which will reconstruct B mesons
964 with an associated signal probability. You have to set configuration.training to False for application mode.
967 You can always turn on the monitoring (configuration.monitor != False),
968 to write out ROOT Histograms of many quantities for each stage,
969 using these histograms you can use the printReporting.py or latexReporting.py scripts to automatically create pdf files.
972 This function can also use old FEI trainings (version 3), just pass the Summary.pickle file of the old training,
973 and the weight files will be automatically converted to the new naming scheme.
975 @param particles list of config.Particle objects
976 @param config config.FeiConfiguration object
979 ____ _ _ _ _ ____ _ _ ____ _ _ ___ _ _ _ ___ ____ ____ ___ ____ ____ ___ ____ ___ _ ____ _ _
980 |___ | | | | |___ | | |___ |\ | | | |\ | | |___ |__/ |__] |__/ |___ | |__| | | | | |\ |
981 | |__| |___ |___ |___ \/ |___ | \| | | | \| | |___ | \ | | \ |___ | | | | | |__| | \|
983 Author: Thomas Keck 2014 - 2017
984 Please cite my PhD thesis
995 if configuration.training
and (configuration.monitor
and (configuration.monitoring_path !=
'')):
996 B2ERROR(
"FEI-core: Custom Monitoring path is not allowed during training!")
998 if configuration.cache
is None:
999 pickleName =
'Summary.pickle'
1000 if configuration.monitor:
1001 pickleName = os.path.join(configuration.monitoring_path, pickleName)
1003 if os.path.isfile(pickleName):
1004 particles_bkp, config_bkp = pickle.load(open(pickleName,
'rb'))
1006 for fd
in configuration._fields:
1007 if fd ==
'cache' or fd ==
'roundMode':
1009 if getattr(configuration, fd) != getattr(config_bkp, fd):
1011 f
"FEI-core: Configuration changed: {fd} from {getattr(config_bkp, fd)} to {getattr(configuration, fd)}")
1013 configuration = config_bkp
1014 cache = configuration.cache
1015 print(
"Cache: Replaced particles from steering and configuration from Summary.pickle: ", cache, configuration.roundMode)
1017 if configuration.training:
1022 cache = configuration.cache
1025 path = basf2.create_path()
1030 stages = get_stages_from_particles(particles)
1035 if configuration.legacy
is not None:
1036 convert_legacy_training(particles, configuration)
1045 if cache < 0
and configuration.training:
1046 print(
"Stage 0: Run over all files to count the number of events and McParticles")
1047 path.add_path(training_data_information.reconstruct())
1048 if configuration.training:
1049 save_summary(particles, configuration, 0)
1050 return FeiState(path, 0, [], [], [])
1051 elif not configuration.training
and configuration.monitor:
1052 path.add_path(training_data_information.reconstruct())
1058 loader =
FSPLoader(particles, configuration)
1060 print(
"Stage 0: Load FSP particles")
1061 path.add_path(loader.reconstruct())
1084 for stage, stage_particles
in enumerate(stages):
1085 if len(stage_particles) == 0:
1086 print(f
"Stage {stage}: No particles to reconstruct in this stage, skipping!")
1093 print(f
"Stage {stage}: PreReconstruct particles: ", [p.name
for p
in stage_particles])
1094 path.add_path(pre_reconstruction.reconstruct())
1095 if configuration.training
and not (post_reconstruction.available()
and configuration.roundMode == 0):
1096 print(f
"Stage {stage}: Create training data for particles: ", [p.name
for p
in stage_particles])
1097 mc_counts = training_data_information.get_mc_counts()
1098 training_data =
TrainingData(stage_particles, configuration, mc_counts)
1099 path.add_path(training_data.reconstruct())
1100 used_lists += [channel.name
for particle
in stage_particles
for channel
in particle.channels]
1103 used_lists += [particle.identifier
for particle
in stage_particles]
1104 if (stage >= cache - 1)
and not ((configuration.roundMode == 1)
and configuration.training):
1105 if (configuration.roundMode == 3)
and configuration.training:
1106 print(f
"Stage {stage}: BDTs already applied for particles, no postReco needed: ", [p.name
for p
in stage_particles])
1108 print(f
"Stage {stage}: Apply BDT for particles: ", [p.name
for p
in stage_particles])
1109 if configuration.training
and not post_reconstruction.available():
1110 raise RuntimeError(
"FEI-core: training of current stage was not successful, please retrain!")
1111 path.add_path(post_reconstruction.reconstruct())
1112 if (((configuration.roundMode == 2)
or (configuration.roundMode == 3))
and configuration.training):
1114 fsps_of_all_stages = [fsp
for sublist
in get_stages_from_particles(loader.get_fsp_lists())
for fsp
in sublist]
1117 if configuration.training
and (configuration.roundMode == 3):
1118 dontRemove = used_lists + fsps_of_all_stages
1120 cleanup = basf2.register_module(
'RemoveParticlesNotInLists')
1121 print(
"FEI-REtrain: pruning basf2_input.root of higher stages")
1122 cleanup.param(
'particleLists', dontRemove)
1123 path.add_module(cleanup)
1127 excludedParticlesNonConjugated = [p.identifier
for p
in particles
if p.identifier
not in dontRemove]
1128 excludedParticles = [
1129 str(name)
for name
in list(
1130 ROOT.Belle2.ParticleListName.addAntiParticleLists(excludedParticlesNonConjugated))]
1131 root_file = ROOT.TFile.Open(
'basf2_input.root',
"READ")
1132 tree = root_file.Get(
'tree')
1133 for branch
in tree.GetListOfBranches():
1134 branchName = branch.GetName()
1135 if any(exParticle
in branchName
for exParticle
in excludedParticles):
1136 excludelists.append(branchName)
1137 print(
"Exclude lists from output: ", excludelists)
1141 if configuration.monitor:
1142 print(
"Add ModuleStatistics")
1143 output = basf2.register_module(
'RootOutput')
1144 output.param(
'outputFileName', os.path.join(configuration.monitoring_path,
'Monitor_ModuleStatistics.root'))
1145 output.param(
'branchNames', [
'EventMetaData'])
1146 output.param(
'branchNamesPersistent', [
'ProcessStatistics'])
1147 output.param(
'ignoreCommandLineOverride',
True)
1148 path.add_module(output)
1152 if configuration.training
or configuration.monitor:
1153 print(
"Save Summary.pickle")
1154 save_summary(particles, configuration, stage+1, pickleName=os.path.join(configuration.monitoring_path,
'Summary.pickle'))
1157 return FeiState(path, stage+1, plists=used_lists, fsplists=fsps_of_all_stages, excludelists=excludelists)
pybasf2.Path reconstruct(self)
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
typing.List[str] get_fsp_lists(self)
config
config.FeiConfiguration object
particles
list of config.Particle objects
pybasf2.Path reconstruct(self)
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
typing.Sequence[str] get_missing_channels(self)
config
config.FeiConfiguration object
particles
list of config.Particle objects
pybasf2.Path reconstruct(self)
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
config
config.FeiConfiguration object
particles
list of config.Particle objects
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
upload(self, str channel)
create_fake_weightfile(str channel)
config
config.FeiConfiguration object
particles
list of config.Particle objects
check_if_weightfile_is_fake(str filename)
pybasf2.Path reconstruct(self)
mc_counts
containing number of MC Particles
config
config.FeiConfiguration object
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config, typing.Mapping[int, typing.Mapping[str, float]] mc_counts)
particles
list of config.Particle objects