14 The Full Event Interpretation Algorithm
17 - The algorithm will automatically reconstruct B mesons and calculate a signal probability for each candidate.
18 - It can be used for hadronic and semileptonic tagging.
19 - The algorithm has to be trained on MC, and can afterwards be applied on data.
20 - The training requires O(100) million MC events
21 - The weight files are stored in the Belle 2 Condition database
23 Read this file if you want to understand the technical details of the FEI.
25 The FEI follows a hierarchical approach.
27 (Stage -1: Write out information about the provided data sample)
28 Stage 0: Final State Particles (FSP)
29 Stage 1: pi0, J/Psi, Lambda0
31 Stage 3: D and Lambda_c mesons
36 Most stages consists of:
37 - Create Particle Candidates
40 - Apply a multivariate classification method
43 The FEI will reconstruct these 7 stages during the training phase,
44 since the stages depend on one another, you have to run basf2 multiple (7) times on the same data
45 to train all the necessary multivariate classifiers.
50 from ROOT
import PyConfig
51 PyConfig.IgnoreCommandLineOptions =
True
55 PyConfig.StartGuiThread =
False
57 from ROOT
import Belle2
61 from basf2
import B2INFO, B2WARNING
63 import modularAnalysis
as ma
71 from fei
import config
82 import multiprocessing
85 FeiState = collections.namedtuple(
'FeiState',
'path, stage, plists')
88 class TrainingDataInformation:
90 Contains the relevant information about the used training data.
91 Basically we write out the number of MC particles in the whole dataset.
92 This numbers we can use to calculate what fraction of candidates we have to write
93 out as TrainingData to get a reasonable amount of candidates to train on
94 (too few candidates will lead to heavy overtraining, too many won't fit into memory).
95 Secondly we can use this information for the generation of the monitoring pdfs,
96 where we calculate reconstruction efficiencies.
99 def __init__(self, particles: typing.Sequence[config.Particle]):
101 Create a new TrainingData object
102 @param particles list of config.Particle objects
105 self.particles = particles
107 self.filename =
'mcParticlesCount.root'
109 def available(self) -> bool:
111 Check if the relevant information is already available
113 return os.path.isfile(self.filename)
115 def reconstruct(self) -> pybasf2.Path:
117 Returns pybasf2.Path which counts the number of MCParticles in each event.
118 @param particles list of config.Particle objects
121 pdgs = {abs(
pdg.from_name(particle.name))
for particle
in self.particles}
123 path = basf2.create_path()
124 module = basf2.register_module(
'VariablesToHistogram')
125 module.set_name(
"VariablesToHistogram_MCCount")
126 module.param(
'variables', [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5)
for pdg
in pdgs])
127 module.param(
'fileName', self.filename)
128 path.add_module(module)
131 def get_mc_counts(self):
133 Read out the number of MC particles from the file created by reconstruct
136 root_file = ROOT.TFile.Open(self.filename,
'read')
141 for key
in root_file.GetListOfKeys():
143 pdg = abs(int(variable[len(
'NumberOfMCParticlesInEvent('):-len(
")")]))
146 mc_counts[pdg][
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
147 for bin
in range(hist.GetNbinsX()))
148 mc_counts[pdg][
'std'] = hist.GetStdDev()
149 mc_counts[pdg][
'avg'] = hist.GetMean()
150 mc_counts[pdg][
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
151 mc_counts[pdg][
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
154 mc_counts[0][
'sum'] = hist.GetEntries()
161 Steers the loading of FSP particles.
162 This does NOT include RootInput, Geometry or anything required before loading FSPs,
163 the user has to add this himself (because it depends on the MC campaign and if you want
164 to use Belle 1 or Belle 2).
167 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
169 Create a new FSPLoader object
170 @param particles list of config.Particle objects
171 @param config config.FeiConfiguration object
174 self.particles = particles
178 def reconstruct(self) -> pybasf2.Path:
180 Returns pybasf2.Path which loads the FSP Particles
182 path = basf2.create_path()
185 ma.fillParticleLists([(
'K+:FSP',
''), (
'pi+:FSP',
''), (
'e+:FSP',
''),
186 (
'mu+:FSP',
''), (
'p+:FSP',
'')], writeOut=
True, path=path)
187 for outputList, inputList
in [(
'gamma:FSP',
'gamma:mdst'), (
'K_S0:V0',
'K_S0:mdst'),
188 (
'Lambda0:V0',
'Lambda0:mdst'), (
'K_L0:FSP',
'K_L0:mdst'),
189 (
'pi0:FSP',
'pi0:mdst'), (
'gamma:V0',
'gamma:v0mdst')]:
190 ma.copyParticles(outputList, inputList, writeOut=
True, path=path)
192 ma.fillParticleLists([(
'K+:FSP',
''), (
'pi+:FSP',
''), (
'e+:FSP',
''),
193 (
'mu+:FSP',
''), (
'gamma:FSP',
''),
194 (
'p+:FSP',
''), (
'K_L0:FSP',
'')], writeOut=
True, loadPhotonBeamBackgroundMVA=
False, path=path)
195 ma.fillParticleList(
'K_S0:V0 -> pi+ pi-',
'', writeOut=
True, path=path)
196 ma.fillParticleList(
'Lambda0:V0 -> p+ pi-',
'', writeOut=
True, path=path)
197 ma.fillConvertedPhotonsList(
'gamma:V0 -> e+ e-',
'', writeOut=
True, path=path)
199 if self.config.monitor:
200 names = [
'e+',
'K+',
'pi+',
'mu+',
'gamma',
'K_S0',
'p+',
'K_L0',
'Lambda0',
'pi0']
201 filename =
'Monitor_FSPLoader.root'
203 variables = [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5)
for pdg
in pdgs]
204 ma.variablesToHistogram(
'', variables=variables, filename=filename, path=path)
210 Steers the creation of the training data.
211 The training data is used to train a multivariate classifier for each channel.
212 The training of the FEI at its core is just generating this training data for each channel.
213 After we created the training data for a stage, we have to train the classifiers (see Teacher class further down).
216 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration,
217 mc_counts: typing.Mapping[int, typing.Mapping[str, float]]):
219 Create a new TrainingData object
220 @param particles list of config.Particle objects
221 @param config config.FeiConfiguration object
222 @param mc_counts containing number of MC Particles
225 self.particles = particles
229 self.mc_counts = mc_counts
231 def reconstruct(self) -> pybasf2.Path:
233 Returns pybasf2.Path which creates the training data for the given particles
235 path = basf2.create_path()
237 for particle
in self.particles:
239 nSignal = self.mc_counts[pdgcode][
'sum']
247 for channel
in particle.channels:
248 filename =
'training_input.root'
250 nBackground = self.mc_counts[0][
'sum'] * channel.preCutConfig.bestCandidateCut
251 inverseSamplingRates = {}
254 if nBackground > Teacher.MaximumNumberOfMVASamples
and not channel.preCutConfig.noBackgroundSampling:
255 inverseSamplingRates[0] = int(nBackground / Teacher.MaximumNumberOfMVASamples) + 1
256 if nSignal > Teacher.MaximumNumberOfMVASamples:
257 inverseSamplingRates[1] = int(nSignal / Teacher.MaximumNumberOfMVASamples) + 1
259 spectators = [channel.mvaConfig.target]
260 if channel.mvaConfig.sPlotVariable
is not None:
261 spectators.append(channel.mvaConfig.sPlotVariable)
263 if self.config.monitor:
264 hist_variables = [
'mcErrors',
'mcParticleStatus'] + channel.mvaConfig.variables + spectators
265 hist_variables_2d = [(x, channel.mvaConfig.target)
266 for x
in channel.mvaConfig.variables + spectators
if x
is not channel.mvaConfig.target]
267 hist_filename = f
'Monitor_TrainingData.root'
268 ma.variablesToHistogram(channel.name, variables=config.variables2binnings(hist_variables),
269 variables_2d=config.variables2binnings_2d(hist_variables_2d),
270 filename=config.removeJPsiSlash(hist_filename),
271 directory=config.removeJPsiSlash(f
'{channel.label}'), path=path)
273 teacher = basf2.register_module(
'VariablesToNtuple')
274 teacher.set_name(
'VariablesToNtuple_' + channel.name)
275 teacher.param(
'fileName', filename)
276 teacher.param(
'treeName', f
'{channel.label} variables')
277 teacher.param(
'variables', channel.mvaConfig.variables + spectators)
278 teacher.param(
'particleList', channel.name)
279 teacher.param(
'sampling', (channel.mvaConfig.target, inverseSamplingRates))
280 path.add_module(teacher)
284 class PreReconstruction:
286 Steers the reconstruction phase before the mva method was applied
288 - The ParticleCombination (for each particle and channel we create candidates using
289 the daughter candidates from the previous stages)
291 - Vertex Fitting (this is the slowest part of the whole FEI, KFit is used by default,
292 but you can use fastFit as a drop-in replacement https://github.com/thomaskeck/FastFit/,
293 this will speed up the whole FEI by a factor 2-3)
296 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
298 Create a new PreReconstruction object
299 @param particles list of config.Particle objects
300 @param config config.FeiConfiguration object
303 self.particles = particles
307 def reconstruct(self) -> pybasf2.Path:
309 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
311 path = basf2.create_path()
313 for particle
in self.particles:
314 for channel
in particle.channels:
316 if len(channel.daughters) == 1:
317 ma.cutAndCopyList(channel.name, channel.daughters[0], channel.preCutConfig.userCut, writeOut=
True, path=path)
318 v2EI = basf2.register_module(
'VariablesToExtraInfo')
319 v2EI.set_name(
'VariablesToExtraInfo_' + channel.name)
320 v2EI.param(
'particleList', channel.name)
321 v2EI.param(
'variables', {f
'constant({channel.decayModeID})':
'decayModeID'})
323 v2EI.set_log_level(basf2.logging.log_level.ERROR)
324 path.add_module(v2EI)
326 ma.reconstructDecay(channel.decayString, channel.preCutConfig.userCut, channel.decayModeID,
327 writeOut=
True, path=path)
328 if self.config.monitor:
329 ma.matchMCTruth(channel.name, path=path)
330 bc_variable = channel.preCutConfig.bestCandidateVariable
331 hist_variables = [bc_variable,
'mcErrors',
'mcParticleStatus', channel.mvaConfig.target]
332 hist_variables_2d = [(bc_variable, channel.mvaConfig.target),
333 (bc_variable,
'mcErrors'),
334 (bc_variable,
'mcParticleStatus')]
335 filename = f
'Monitor_PreReconstruction_BeforeRanking.root'
336 ma.variablesToHistogram(channel.name,
337 variables=config.variables2binnings(hist_variables),
338 variables_2d=config.variables2binnings_2d(hist_variables_2d),
339 filename=filename, directory=f
'{channel.label}', path=path)
341 if channel.preCutConfig.bestCandidateMode ==
'lowest':
342 ma.rankByLowest(channel.name,
343 channel.preCutConfig.bestCandidateVariable,
344 channel.preCutConfig.bestCandidateCut,
347 elif channel.preCutConfig.bestCandidateMode ==
'highest':
348 ma.rankByHighest(channel.name,
349 channel.preCutConfig.bestCandidateVariable,
350 channel.preCutConfig.bestCandidateCut,
354 raise RuntimeError(
"Unknown bestCandidateMode " + repr(channel.preCutConfig.bestCandidateMode))
356 if self.config.monitor:
357 filename = f
'Monitor_PreReconstruction_AfterRanking.root'
358 hist_variables += [
'extraInfo(preCut_rank)']
359 hist_variables_2d += [(
'extraInfo(preCut_rank)', channel.mvaConfig.target),
360 (
'extraInfo(preCut_rank)',
'mcErrors'),
361 (
'extraInfo(preCut_rank)',
'mcParticleStatus')]
362 ma.variablesToHistogram(channel.name,
363 variables=config.variables2binnings(hist_variables),
364 variables_2d=config.variables2binnings_2d(hist_variables_2d),
365 filename=filename, directory=f
'{channel.label}', path=path)
368 elif self.config.training:
369 ma.matchMCTruth(channel.name, path=path)
372 pvfit = basf2.register_module(
'ParticleVertexFitter')
373 pvfit.set_name(
'ParticleVertexFitter_' + channel.name)
374 pvfit.param(
'listName', channel.name)
375 pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
376 pvfit.param(
'vertexFitter',
'KFit')
377 pvfit.param(
'fitType',
'vertex')
378 pvfit.set_log_level(basf2.logging.log_level.ERROR)
379 path.add_module(pvfit)
380 elif re.findall(
r"[\w']+", channel.decayString).count(
'pi0') > 1
and particle.name !=
'pi0':
381 basf2.B2INFO(f
"Ignoring vertex fit for {channel.name} because multiple pi0 are not supported yet.")
382 elif len(channel.daughters) > 1:
383 pvfit = basf2.register_module(
'ParticleVertexFitter')
384 pvfit.set_name(
'ParticleVertexFitter_' + channel.name)
385 pvfit.param(
'listName', channel.name)
386 pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
387 pvfit.param(
'vertexFitter',
'KFit')
388 pvfit.param(
'fitType',
'vertex')
389 pvfit.set_log_level(basf2.logging.log_level.ERROR)
390 path.add_module(pvfit)
392 if self.config.monitor:
393 hist_variables = [
'chiProb',
'mcErrors',
'mcParticleStatus', channel.mvaConfig.target]
394 hist_variables_2d = [(
'chiProb', channel.mvaConfig.target),
395 (
'chiProb',
'mcErrors'),
396 (
'chiProb',
'mcParticleStatus')]
397 filename = f
'Monitor_PreReconstruction_AfterVertex.root'
398 ma.variablesToHistogram(channel.name,
399 variables=config.variables2binnings(hist_variables),
400 variables_2d=config.variables2binnings_2d(hist_variables_2d),
401 filename=filename, directory=f
'{channel.label}', path=path)
406 class PostReconstruction:
408 Steers the reconstruction phase after the mva method was applied
410 - The application of the mva method itself.
411 - Copying all channel lists in a common one for each particle defined in particles
412 - Tag unique signal candidates, to avoid double counting of channels with overlap
415 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
417 Create a new PostReconstruction object
418 @param particles list of config.Particle objects
419 @param config config.FeiConfiguration object
422 self.particles = particles
426 def get_missing_channels(self) -> typing.Sequence[str]:
428 Returns all channels for which the weightfile is missing
431 for particle
in self.particles:
432 for channel
in particle.channels:
434 weightfile = channel.label +
'.xml'
435 if not basf2_mva.available(weightfile):
436 missing += [channel.label]
439 def available(self) -> bool:
441 Check if the relevant information is already available
443 return len(self.get_missing_channels()) == 0
445 def reconstruct(self) -> pybasf2.Path:
447 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
449 path = basf2.create_path()
451 for particle
in self.particles:
452 for channel
in particle.channels:
453 expert = basf2.register_module(
'MVAExpert')
454 expert.set_name(
'MVAExpert_' + channel.name)
455 if self.config.training:
456 expert.param(
'identifier', channel.label +
'.xml')
458 expert.param(
'identifier', self.config.prefix +
'_' + channel.label)
459 expert.param(
'extraInfoName',
'SignalProbability')
460 expert.param(
'listNames', [channel.name])
462 expert.set_log_level(basf2.logging.log_level.ERROR)
463 path.add_module(expert)
465 uniqueSignal = basf2.register_module(
'TagUniqueSignal')
466 uniqueSignal.param(
'particleList', channel.name)
467 uniqueSignal.param(
'target', channel.mvaConfig.target)
468 uniqueSignal.param(
'extraInfoName',
'uniqueSignal')
469 uniqueSignal.set_name(
'TagUniqueSignal_' + channel.name)
471 uniqueSignal.set_log_level(basf2.logging.log_level.ERROR)
472 path.add_module(uniqueSignal)
474 if self.config.monitor:
475 hist_variables = [
'mcErrors',
'mcParticleStatus',
'extraInfo(uniqueSignal)',
'extraInfo(SignalProbability)',
476 channel.mvaConfig.target,
'extraInfo(decayModeID)']
477 hist_variables_2d = [(
'extraInfo(SignalProbability)', channel.mvaConfig.target),
478 (
'extraInfo(SignalProbability)',
'mcErrors'),
479 (
'extraInfo(SignalProbability)',
'mcParticleStatus'),
480 (
'extraInfo(decayModeID)', channel.mvaConfig.target),
481 (
'extraInfo(decayModeID)',
'mcErrors'),
482 (
'extraInfo(decayModeID)',
'extraInfo(uniqueSignal)'),
483 (
'extraInfo(decayModeID)',
'mcParticleStatus')]
484 filename = f
'Monitor_PostReconstruction_AfterMVA.root'
485 ma.variablesToHistogram(channel.name,
486 variables=config.variables2binnings(hist_variables),
487 variables_2d=config.variables2binnings_2d(hist_variables_2d),
488 filename=filename, directory=f
'{channel.label}', path=path)
491 if particle.postCutConfig.value > 0.0:
492 cutstring = str(particle.postCutConfig.value) +
' < extraInfo(SignalProbability)'
494 ma.mergeListsWithBestDuplicate(particle.identifier, [c.name
for c
in particle.channels],
495 variable=
'particleSource', writeOut=
True, path=path)
497 if self.config.monitor:
498 hist_variables = [
'mcErrors',
'mcParticleStatus',
'extraInfo(uniqueSignal)',
'extraInfo(SignalProbability)',
499 particle.mvaConfig.target,
'extraInfo(decayModeID)']
500 hist_variables_2d = [(
'extraInfo(decayModeID)', particle.mvaConfig.target),
501 (
'extraInfo(decayModeID)',
'mcErrors'),
502 (
'extraInfo(decayModeID)',
'mcParticleStatus')]
503 filename = f
'Monitor_PostReconstruction_BeforePostCut.root'
504 ma.variablesToHistogram(
506 variables=config.variables2binnings(hist_variables),
507 variables_2d=config.variables2binnings_2d(hist_variables_2d),
508 filename=config.removeJPsiSlash(filename),
509 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
512 ma.applyCuts(particle.identifier, cutstring, path=path)
514 if self.config.monitor:
515 filename = f
'Monitor_PostReconstruction_BeforeRanking.root'
516 ma.variablesToHistogram(
518 variables=config.variables2binnings(hist_variables),
519 variables_2d=config.variables2binnings_2d(hist_variables_2d),
520 filename=config.removeJPsiSlash(filename),
521 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
524 ma.rankByHighest(particle.identifier,
'extraInfo(SignalProbability)',
525 particle.postCutConfig.bestCandidateCut,
'postCut_rank', path=path)
527 if self.config.monitor:
528 hist_variables += [
'extraInfo(postCut_rank)']
529 hist_variables_2d += [(
'extraInfo(decayModeID)',
'extraInfo(postCut_rank)'),
530 (particle.mvaConfig.target,
'extraInfo(postCut_rank)'),
531 (
'mcErrors',
'extraInfo(postCut_rank)'),
532 (
'mcParticleStatus',
'extraInfo(postCut_rank)')]
533 filename = f
'Monitor_PostReconstruction_AfterRanking.root'
534 ma.variablesToHistogram(
536 variables=config.variables2binnings(hist_variables),
537 variables_2d=config.variables2binnings_2d(hist_variables_2d),
538 filename=config.removeJPsiSlash(filename),
539 directory=config.removeJPsiSlash(f
'{particle.identifier}'),
542 variables = [
'extraInfo(SignalProbability)',
'mcErrors',
'mcParticleStatus', particle.mvaConfig.target,
543 'extraInfo(uniqueSignal)',
'extraInfo(decayModeID)']
545 if 'B_s0' == particle.name:
547 elif 'B' in particle.name:
548 variables += [
'Mbc',
'cosThetaBetweenParticleAndNominalB']
550 filename = f
'Monitor_Final.root'
551 ma.variablesToNtuple(particle.identifier, variables, treename=config.removeJPsiSlash(
552 f
'{particle.identifier} variables'), filename=config.removeJPsiSlash(filename), path=path)
558 Performs all necessary trainings for all training data files which are
559 available but where there is no weight file available yet.
560 This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
561 The trainings are run in parallel using multi-threading of python.
562 Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
563 * basf2_mva_teacher, the training will be done directly on the machine
564 * externClustTeacher, the training will be submitted to the batch system of KEKCC
568 MaximumNumberOfMVASamples = int(1e7)
571 MinimumNumberOfMVASamples = int(5e2)
573 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
575 Create a new Teacher object
576 @param particles list of config.Particle objects
577 @param config config.FeiConfiguration object
580 self.particles = particles
585 def create_fake_weightfile(channel: str):
587 Create a fake weight file using the trivial method, it will always return 0.0
588 @param channel for which we create a fake weight file
591 <?xml version="1.0" encoding="utf-8"?>
592 <method>Trivial</method>
593 <weightfile>{channel}.xml</weightfile>
594 <treename>tree</treename>
595 <target_variable>isSignal</target_variable>
596 <weight_variable>__weight__</weight_variable>
597 <signal_class>1</signal_class>
598 <max_events>0</max_events>
599 <number_feature_variables>1</number_feature_variables>
600 <variable0>M</variable0>
601 <number_spectator_variables>0</number_spectator_variables>
602 <number_data_files>1</number_data_files>
603 <datafile0>train.root</datafile0>
604 <Trivial_version>1</Trivial_version>
605 <Trivial_output>0</Trivial_output>
606 <signal_fraction>0.066082567</signal_fraction>
608 with open(channel +
".xml",
"w")
as f:
612 def check_if_weightfile_is_fake(filename: str):
614 Checks if the provided filename is a fake-weight file or not
615 @param filename the filename of the weight file
618 return '<method>Trivial</method>' in open(filename).readlines()[2]
619 except BaseException:
623 def upload(self, channel: str):
625 Upload the weight file into the condition database
626 @param channel whose weight file is uploaded
628 disk = channel +
'.xml'
629 dbase = self.config.prefix +
'_' + channel
630 basf2_mva.upload(disk, dbase)
633 def do_all_trainings(self):
635 Do all trainings for which we find training data
638 filename =
'training_input.root'
639 if not os.path.isfile(filename):
640 B2WARNING(
"Training of MVC failed. Couldn't find ROOT file. "
641 "No weight files will be provided.")
643 f = ROOT.TFile.Open(filename,
'read')
645 B2WARNING(
"Training of MVC failed. ROOT file corrupt. "
646 "No weight files will be provided.")
647 elif len([k.GetName()
for k
in f.GetListOfKeys()]) == 0:
648 B2WARNING(
"Training of MVC failed. ROOT file does not contain any trees. "
649 "No weight files will be provided.")
651 for particle
in self.particles:
652 for channel
in particle.channels:
653 weightfile = channel.label +
'.xml'
654 if not basf2_mva.available(weightfile):
655 keys = [m
for m
in f.GetListOfKeys()
if f
"{channel.label}" in m.GetName()]
658 tree = keys[0].ReadObj()
659 nSig = tree.GetEntries(channel.mvaConfig.target +
' == 1.0')
660 nBg = tree.GetEntries(channel.mvaConfig.target +
' != 1.0')
661 if nSig < Teacher.MinimumNumberOfMVASamples:
662 B2WARNING(
"Training of MVC failed. "
663 f
"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
664 self.create_fake_weightfile(channel.label)
665 self.upload(channel.label)
667 if nBg < Teacher.MinimumNumberOfMVASamples:
668 B2WARNING(
"Training of MVC failed. "
669 f
"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
670 self.create_fake_weightfile(channel.label)
671 self.upload(channel.label)
673 variable_str =
"' '".join(channel.mvaConfig.variables)
675 command = (f
"{self.config.externTeacher}"
676 f
" --method '{channel.mvaConfig.method}'"
677 f
" --target_variable '{channel.mvaConfig.target}'"
678 f
" --treename '{channel.label} variables' --datafile 'training_input.root'"
679 f
" --signal_class 1 --variables '{variable_str}'"
680 f
" --identifier '{channel.label}.xml'"
681 f
" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1")
682 B2INFO(f
"Used following command to invoke teacher: \n {command}")
683 job_list.append((channel.label, command))
685 p = multiprocessing.Pool(
None, maxtasksperchild=1)
686 func = functools.partial(subprocess.call, shell=
True)
687 p.map(func, [c
for _, c
in job_list])
691 for name, _
in job_list:
692 if not basf2_mva.available(name +
'.xml'):
693 B2WARNING(
"Training of MVC failed. For unknown reasons, check the logfile")
694 self.create_fake_weightfile(name)
695 weightfiles.append(self.upload(name))
699 def convert_legacy_training(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
701 Convert an old FEI training into the new format.
702 The old format used hashes for the weight files, the hashes can be converted to the new naming scheme
703 using the Summary.pickle file outputted by the FEIv3. This file must be passes by the parameter configuration.legacy.
704 @param particles list of config.Particle objects
705 @param config config.FeiConfiguration object
707 summary = pickle.load(open(configuration.legacy,
'rb'))
708 channel2lists = {k: v[2]
for k, v
in summary[
'channel2lists'].items()}
710 teacher = Teacher(particles, configuration)
712 for particle
in particles:
713 for channel
in particle.channels:
714 new_weightfile = configuration.prefix +
'_' + channel.label
715 old_weightfile = configuration.prefix +
'_' + channel2lists[channel.label.replace(
'Jpsi',
'J/psi')]
716 if not basf2_mva.available(new_weightfile):
717 if old_weightfile
is None or not basf2_mva.available(old_weightfile):
718 Teacher.create_fake_weightfile(channel.label)
719 teacher.upload(channel.label)
721 basf2_mva.download(old_weightfile, channel.label +
'.xml')
722 teacher.upload(channel.label)
725 def get_stages_from_particles(particles: typing.Sequence[config.Particle]):
727 Returns the hierarchical structure of the FEI.
728 Each stage depends on the particles in the previous stage.
729 The final stage is empty (meaning everything is done, and the training is finished at this point).
730 @param particles list of config.Particle objects
733 [p
for p
in particles
if p.name
in [
'e+',
'K+',
'pi+',
'mu+',
'gamma',
'p+',
'K_L0']],
734 [p
for p
in particles
if p.name
in [
'pi0',
'J/psi',
'Lambda0']],
735 [p
for p
in particles
if p.name
in [
'K_S0',
'Sigma+']],
736 [p
for p
in particles
if p.name
in [
'D+',
'D0',
'D_s+',
'Lambda_c+']],
737 [p
for p
in particles
if p.name
in [
'D*+',
'D*0',
'D_s*+']],
738 [p
for p
in particles
if p.name
in [
'B0',
'B+',
'B_s0']],
743 if p.name
not in [p.name
for stage
in stages
for p
in stage]:
744 raise RuntimeError(f
"Unknown particle {p.name}: Not implemented in FEI")
749 def do_trainings(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
751 Performs the training of mva classifiers for all available training data,
752 this function must be either called by the user after each stage of the FEI during training,
753 or (more likely) is called by the distributed.py script after merging the outputs of all jobs,
754 @param particles list of config.Particle objects
755 @param config config.FeiConfiguration object
756 @return list of tuple with weight file on disk and identifier in database for all trained classifiers
758 teacher = Teacher(particles, configuration)
759 return teacher.do_all_trainings()
762 def save_summary(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration, cache: int):
764 Creates the Summary.pickle, which is used to keep track of the stage during the training,
765 and can be used later to investigate which configuration was used exactly to create the training.
766 @param particles list of config.Particle objects
767 @param config config.FeiConfiguration object
768 @param cache current cache level
770 configuration = config.FeiConfiguration(configuration.prefix, cache,
771 configuration.monitor, configuration.legacy, configuration.externTeacher,
772 configuration.training)
774 for i
in [8, 7, 6, 5, 4, 3, 2, 1, 0]:
775 if os.path.isfile(f
'Summary.pickle.backup_{i}'):
776 shutil.copyfile(f
'Summary.pickle.backup_{i}', f
'Summary.pickle.backup_{i + 1}')
777 if os.path.isfile(
'Summary.pickle'):
778 shutil.copyfile(
'Summary.pickle',
'Summary.pickle.backup_0')
779 pickle.dump((particles, configuration), open(
'Summary.pickle',
'wb'))
782 def get_path(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration) -> FeiState:
784 The most important function of the FEI.
785 This creates the FEI path for training/fitting (both terms are equal), and application/inference (both terms are equal).
786 The whole FEI is defined by the particles which are reconstructed (see default_channels.py)
787 and the configuration (see config.py).
790 For training this function is called multiple times, each time the FEI reconstructs one more stage in the hierarchical structure
791 i.e. we start with FSP, pi0, KS_0, D, D*, and with B mesons. You have to set configuration.training to True for training mode.
792 All weight files created during the training will be stored in your local database.
793 If you want to use the FEI training everywhere without copying this database by hand, you have to upload your local database
794 to the central database first (see documentation for the Belle2 Condition Database).
797 For application you call this function once, and it returns the whole path which will reconstruct B mesons
798 with an associated signal probability. You have to set configuration.training to False for application mode.
801 You can always turn on the monitoring (configuration.monitor = True),
802 to write out ROOT Histograms of many quantities for each stage,
803 using these histograms you can use the printReporting.py or latexReporting.py scripts to automatically create pdf files.
806 This function can also use old FEI trainings (version 3), just pass the Summary.pickle file of the old training,
807 and the weight files will be automatically converted to the new naming scheme.
809 @param particles list of config.Particle objects
810 @param config config.FeiConfiguration object
813 ____ _ _ _ _ ____ _ _ ____ _ _ ___ _ _ _ ___ ____ ____ ___ ____ ____ ___ ____ ___ _ ____ _ _
814 |___ | | | | |___ | | |___ |\ | | | |\ | | |___ |__/ |__] |__/ |___ | |__| | | | | |\ |
815 | |__| |___ |___ |___ \/ |___ | \| | | | \| | |___ | \ | | \ |___ | | | | | |__| | \|
817 Author: Thomas Keck 2014 - 2017
818 Please cite my PhD thesis
827 if configuration.cache
is None:
828 if os.path.isfile(
'Summary.pickle'):
829 print(
"Cache: Replaced particles and configuration with the ones from Summary.pickle!")
830 particles, configuration = pickle.load(open(
'Summary.pickle',
'rb'))
831 cache = configuration.cache
833 if configuration.training:
838 cache = configuration.cache
841 path = basf2.create_path()
846 stages = get_stages_from_particles(particles)
851 if configuration.legacy
is not None:
852 convert_legacy_training(particles, configuration)
860 training_data_information = TrainingDataInformation(particles)
861 if cache < 0
and configuration.training:
862 print(
"Stage 0: Run over all files to count the number of events and McParticles")
863 path.add_path(training_data_information.reconstruct())
864 if configuration.training:
865 save_summary(particles, configuration, 0)
866 return FeiState(path, 0, [])
867 elif not configuration.training
and configuration.monitor:
868 path.add_path(training_data_information.reconstruct())
874 loader = FSPLoader(particles, configuration)
876 print(
"Stage 0: Load FSP particles")
877 path.add_path(loader.reconstruct())
900 for stage, stage_particles
in enumerate(stages):
901 pre_reconstruction = PreReconstruction(stage_particles, configuration)
903 print(f
"Stage {stage}: PreReconstruct particles: ", [p.name
for p
in stage_particles])
904 path.add_path(pre_reconstruction.reconstruct())
906 post_reconstruction = PostReconstruction(stage_particles, configuration)
907 if configuration.training
and not post_reconstruction.available():
908 print(f
"Stage {stage}: Create training data for particles: ", [p.name
for p
in stage_particles])
909 mc_counts = training_data_information.get_mc_counts()
910 training_data = TrainingData(stage_particles, configuration, mc_counts)
911 path.add_path(training_data.reconstruct())
912 used_lists += [channel.name
for particle
in stage_particles
for channel
in particle.channels]
914 if cache <= stage + 1:
915 path.add_path(post_reconstruction.reconstruct())
916 used_lists += [particle.identifier
for particle
in stage_particles]
920 if configuration.monitor:
921 output = basf2.register_module(
'RootOutput')
922 output.param(
'outputFileName',
'Monitor_ModuleStatistics.root')
923 output.param(
'branchNames', [
'EventMetaData'])
924 output.param(
'branchNamesPersistent', [
'ProcessStatistics'])
925 output.param(
'ignoreCommandLineOverride',
True)
926 path.add_module(output)
930 if configuration.training
or configuration.monitor:
931 save_summary(particles, configuration, stage + 1)
934 return FeiState(path, stage + 1, plists=used_lists)
Global list of available variables.
std::string invertMakeROOTCompatible(std::string str)
Invert makeROOTCompatible operation.