14 The Full Event Interpretation Algorithm 
   17  - The algorithm will automatically reconstruct B mesons and calculate a signal probability 
for each candidate.
 
   18  - It can be used 
for hadronic 
and semileptonic tagging.
 
   19  - The algorithm has to be trained on MC, 
and can afterwards be applied on data.
 
   20  - The training requires O(100) million MC events
 
   21  - The weight files are stored 
in the Belle II Condition database
 
   23 Read this file 
if you want to understand the technical details of the FEI.
 
   25 The FEI follows a hierarchical approach.
 
   27   (Stage -1: Write out information about the provided data sample)
 
   28   Stage 0: Final State Particles (FSP)
 
   29   Stage 1: pi0, J/Psi, Lambda0
 
   31   Stage 3: D 
and Lambda_c mesons
 
   36 Most stages consists of:
 
   37   - Create Particle Candidates
 
   40   - Apply a multivariate classification method
 
   43 The FEI will reconstruct these 7 stages during the training phase,
 
   44 since the stages depend on one another, you have to run basf2 multiple (7) times on the same data
 
   45 to train all the necessary multivariate classifiers.
 
   50from basf2 import B2INFO, B2WARNING 
   52import modularAnalysis as ma 
   55# Should come after basf2 import 
   60# Standard python modules 
   71# Simple object containing the output of fei 
   72FeiState = collections.namedtuple('FeiState', 'path, stage, plists') 
   75class TrainingDataInformation: 
   77    Contains the relevant information about the used training data. 
   78    Basically we write out the number of MC particles in the whole dataset.
 
   79    This numbers we can use to calculate what fraction of candidates we have to write
 
   80    out 
as TrainingData to get a reasonable amount of candidates to train on
 
   81    (too few candidates will lead to heavy overtraining, too many won
't fit into memory).    Secondly we can use this information for the generation of the monitoring pdfs,
 
   82    where we calculate reconstruction efficiencies.
 
   85    def __init__(self, particles: typing.Sequence[config.Particle]):
 
   87        Create a new TrainingData object 
   88        @param particles list of config.Particle objects
 
   91        self.particles = particles 
   93        self.filename = 'mcParticlesCount.root' 
   95    def available(self) -> bool:
 
   97        Check if the relevant information 
is already available
 
   99        return os.path.isfile(self.filename)
 
  101    def reconstruct(self) -> pybasf2.Path:
 
  103        Returns pybasf2.Path which counts the number of MCParticles in each event.
 
  104        @param particles list of config.Particle objects
 
  107        pdgs = {abs(
pdg.from_name(particle.name)) 
for particle 
in self.particles}
 
  109        path = basf2.create_path()
 
  110        module = basf2.register_module(
'VariablesToHistogram')
 
  111        module.set_name(
"VariablesToHistogram_MCCount")
 
  112        module.param(
'variables', [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) 
for pdg 
in pdgs])
 
  113        module.param(
'fileName', self.filename)
 
  114        module.param(
'ignoreCommandLineOverride', 
True)
 
  115        path.add_module(module)
 
  118    def get_mc_counts(self):
 
  120        Read out the number of MC particles from the file created by reconstruct
 
  125        root_file = ROOT.TFile.Open(self.filename, 
'read')
 
  128        for key 
in root_file.GetListOfKeys():
 
  129            variable = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
 
  130            pdg = abs(int(variable[len(
'NumberOfMCParticlesInEvent('):-len(
")")]))
 
  133            mc_counts[pdg][
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
 
  134                                        for bin 
in range(hist.GetNbinsX()))
 
  135            mc_counts[pdg][
'std'] = hist.GetStdDev()
 
  136            mc_counts[pdg][
'avg'] = hist.GetMean()
 
  137            mc_counts[pdg][
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
 
  138            mc_counts[pdg][
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
 
  141        mc_counts[0][
'sum'] = hist.GetEntries()
 
  148    Steers the loading of FSP particles. 
  149    This does NOT include RootInput, Geometry or anything required before loading FSPs,
 
  150    the user has to add this himself (because it depends on the MC campaign 
and if you want
 
  151    to use Belle 
or Belle II).
 
  154    def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
 
  156        Create a new FSPLoader object 
  157        @param particles list of config.Particle objects
 
  158        @param config config.FeiConfiguration object
 
  161        self.particles = particles 
  165    def reconstruct(self) -> pybasf2.Path:
 
  167        Returns pybasf2.Path which loads the FSP Particles 
  169        path = basf2.create_path() 
  172            ma.fillParticleLists([(
'K+:FSP', 
''), (
'pi+:FSP', 
''), (
'e+:FSP', 
''),
 
  173                                  (
'mu+:FSP', 
''), (
'p+:FSP', 
'')], writeOut=
True, path=path)
 
  174            for outputList, inputList 
in [(
'gamma:FSP', 
'gamma:mdst'), (
'K_S0:V0', 
'K_S0:mdst'),
 
  175                                          (
'Lambda0:V0', 
'Lambda0:mdst'), (
'K_L0:FSP', 
'K_L0:mdst'),
 
  176                                          (
'pi0:FSP', 
'pi0:mdst'), (
'gamma:V0', 
'gamma:v0mdst')]:
 
  177                ma.copyParticles(outputList, inputList, writeOut=
True, path=path)
 
  179            ma.fillParticleLists([(
'K+:FSP', 
''), (
'pi+:FSP', 
''), (
'e+:FSP', 
''),
 
  180                                  (
'mu+:FSP', 
''), (
'gamma:FSP', 
''),
 
  181                                  (
'p+:FSP', 
''), (
'K_L0:FSP', 
'')], writeOut=
True, path=path)
 
  182            ma.fillParticleList(
'K_S0:V0 -> pi+ pi-', 
'', writeOut=
True, path=path)
 
  183            ma.fillParticleList(
'Lambda0:V0 -> p+ pi-', 
'', writeOut=
True, path=path)
 
  184            ma.fillConvertedPhotonsList(
'gamma:V0 -> e+ e-', 
'', writeOut=
True, path=path)
 
  186        if self.config.monitor:
 
  187            names = [
'e+', 
'K+', 
'pi+', 
'mu+', 
'gamma', 
'K_S0', 
'p+', 
'K_L0', 
'Lambda0', 
'pi0']
 
  188            filename = 
'Monitor_FSPLoader.root' 
  190            variables = [(f
'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) 
for pdg 
in pdgs]
 
  191            ma.variablesToHistogram(
'', variables=variables, filename=filename, ignoreCommandLineOverride=
True, path=path)
 
  197    Steers the creation of the training data. 
  198    The training data is used to train a multivariate classifier 
for each channel.
 
  199    The training of the FEI at its core 
is just generating this training data 
for each channel.
 
  200    After we created the training data 
for a stage, we have to train the classifiers (see Teacher 
class further down).
 
  203    def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration,
 
  204                 mc_counts: typing.Mapping[int, typing.Mapping[str, float]]):
 
  206        Create a new TrainingData object 
  207        @param particles list of config.Particle objects
 
  208        @param config config.FeiConfiguration object
 
  209        @param mc_counts containing number of MC Particles
 
  212        self.particles = particles 
  216        self.mc_counts = mc_counts 
  218    def reconstruct(self) -> pybasf2.Path:
 
  220        Returns pybasf2.Path which creates the training data for the given particles
 
  222        path = basf2.create_path() 
  224        for particle 
in self.particles:
 
  226            nSignal = self.mc_counts[pdgcode][
'sum']
 
  234            for channel 
in particle.channels:
 
  235                filename = 
'training_input.root' 
  237                nBackground = self.mc_counts[0][
'sum'] * channel.preCutConfig.bestCandidateCut
 
  238                inverseSamplingRates = {}
 
  241                if nBackground > Teacher.MaximumNumberOfMVASamples 
and not channel.preCutConfig.noBackgroundSampling:
 
  242                    inverseSamplingRates[0] = int(nBackground / Teacher.MaximumNumberOfMVASamples) + 1
 
  243                if nSignal > Teacher.MaximumNumberOfMVASamples:
 
  244                    inverseSamplingRates[1] = int(nSignal / Teacher.MaximumNumberOfMVASamples) + 1
 
  246                spectators = [channel.mvaConfig.target]
 
  247                if channel.mvaConfig.sPlotVariable 
is not None:
 
  248                    spectators.append(channel.mvaConfig.sPlotVariable)
 
  250                if self.config.monitor:
 
  251                    hist_variables = [
'mcErrors', 
'mcParticleStatus'] + channel.mvaConfig.variables + spectators
 
  252                    hist_variables_2d = [(x, channel.mvaConfig.target)
 
  253                                         for x 
in channel.mvaConfig.variables + spectators 
if x 
is not channel.mvaConfig.target]
 
  254                    hist_filename = 
'Monitor_TrainingData.root' 
  255                    ma.variablesToHistogram(channel.name, variables=config.variables2binnings(hist_variables),
 
  256                                            variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  257                                            filename=config.removeJPsiSlash(hist_filename),
 
  258                                            ignoreCommandLineOverride=
True,
 
  259                                            directory=config.removeJPsiSlash(f
'{channel.label}'), path=path)
 
  261                teacher = basf2.register_module(
'VariablesToNtuple')
 
  262                teacher.set_name(
'VariablesToNtuple_' + channel.name)
 
  263                teacher.param(
'fileName', filename)
 
  264                teacher.param(
'treeName', f
'{channel.label} variables')
 
  265                teacher.param(
'variables', channel.mvaConfig.variables + spectators)
 
  266                teacher.param(
'particleList', channel.name)
 
  267                teacher.param(
'sampling', (channel.mvaConfig.target, inverseSamplingRates))
 
  268                teacher.param(
'ignoreCommandLineOverride', 
True)
 
  269                path.add_module(teacher)
 
  273class PreReconstruction:
 
  275    Steers the reconstruction phase before the mva method was applied 
  277        - The ParticleCombination (for each particle 
and channel we create candidates using
 
  278                                   the daughter candidates 
from the previous stages)
 
  280        - Vertex Fitting (this 
is the slowest part of the whole FEI, KFit 
is used by default,
 
  281                          but you can use fastFit 
as a drop-
in replacement https://github.com/thomaskeck/FastFit/,
 
  282                          this will speed up the whole FEI by a factor 2-3)
 
  285    def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
 
  287        Create a new PreReconstruction object 
  288        @param particles list of config.Particle objects
 
  289        @param config config.FeiConfiguration object
 
  292        self.particles = particles 
  296    def reconstruct(self) -> pybasf2.Path:
 
  298        Returns pybasf2.Path which reconstructs the particles and does the vertex fitting 
if necessary
 
  300        path = basf2.create_path() 
  302        for particle 
in self.particles:
 
  303            for channel 
in particle.channels:
 
  305                if len(channel.daughters) == 1:
 
  306                    ma.cutAndCopyList(channel.name, channel.daughters[0], channel.preCutConfig.userCut, writeOut=
True, path=path)
 
  307                    v2EI = basf2.register_module(
'VariablesToExtraInfo')
 
  308                    v2EI.set_name(
'VariablesToExtraInfo_' + channel.name)
 
  309                    v2EI.param(
'particleList', channel.name)
 
  310                    v2EI.param(
'variables', {f
'constant({channel.decayModeID})': 
'decayModeID'})
 
  312                    v2EI.set_log_level(basf2.logging.log_level.ERROR)
 
  313                    path.add_module(v2EI)
 
  315                    ma.reconstructDecay(channel.decayString, channel.preCutConfig.userCut, channel.decayModeID,
 
  316                                        writeOut=
True, path=path)
 
  317                if self.config.monitor:
 
  318                    ma.matchMCTruth(channel.name, path=path)
 
  319                    bc_variable = channel.preCutConfig.bestCandidateVariable
 
  320                    hist_variables = [bc_variable, 
'mcErrors', 
'mcParticleStatus', channel.mvaConfig.target]
 
  321                    hist_variables_2d = [(bc_variable, channel.mvaConfig.target),
 
  322                                         (bc_variable, 
'mcErrors'),
 
  323                                         (bc_variable, 
'mcParticleStatus')]
 
  324                    filename = 
'Monitor_PreReconstruction_BeforeRanking.root' 
  325                    ma.variablesToHistogram(
 
  327                        variables=config.variables2binnings(hist_variables),
 
  328                        variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  330                        ignoreCommandLineOverride=
True,
 
  331                        directory=f
'{channel.label}',
 
  334                if channel.preCutConfig.bestCandidateMode == 
'lowest':
 
  335                    ma.rankByLowest(channel.name,
 
  336                                    channel.preCutConfig.bestCandidateVariable,
 
  337                                    channel.preCutConfig.bestCandidateCut,
 
  340                elif channel.preCutConfig.bestCandidateMode == 
'highest':
 
  341                    ma.rankByHighest(channel.name,
 
  342                                     channel.preCutConfig.bestCandidateVariable,
 
  343                                     channel.preCutConfig.bestCandidateCut,
 
  347                    raise RuntimeError(
"Unknown bestCandidateMode " + repr(channel.preCutConfig.bestCandidateMode))
 
  349                if self.config.monitor:
 
  350                    filename = 
'Monitor_PreReconstruction_AfterRanking.root' 
  351                    hist_variables += [
'extraInfo(preCut_rank)']
 
  352                    hist_variables_2d += [(
'extraInfo(preCut_rank)', channel.mvaConfig.target),
 
  353                                          (
'extraInfo(preCut_rank)', 
'mcErrors'),
 
  354                                          (
'extraInfo(preCut_rank)', 
'mcParticleStatus')]
 
  355                    ma.variablesToHistogram(
 
  357                        variables=config.variables2binnings(hist_variables),
 
  358                        variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  360                        ignoreCommandLineOverride=
True,
 
  361                        directory=f
'{channel.label}',
 
  365                elif self.config.training:
 
  366                    ma.matchMCTruth(channel.name, path=path)
 
  369                    pvfit = basf2.register_module(
'ParticleVertexFitter')
 
  370                    pvfit.set_name(
'ParticleVertexFitter_' + channel.name)
 
  371                    pvfit.param(
'listName', channel.name)
 
  372                    pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
 
  373                    pvfit.param(
'vertexFitter', 
'KFit')
 
  374                    pvfit.param(
'fitType', 
'vertex')
 
  375                    pvfit.set_log_level(basf2.logging.log_level.ERROR)  
 
  376                    path.add_module(pvfit)
 
  377                elif re.findall(
r"[\w']+", channel.decayString).count(
'pi0') > 1 
and particle.name != 
'pi0':
 
  378                    basf2.B2INFO(f
"Ignoring vertex fit for {channel.name} because multiple pi0 are not supported yet.")
 
  379                elif len(channel.daughters) > 1:
 
  380                    pvfit = basf2.register_module(
'ParticleVertexFitter')
 
  381                    pvfit.set_name(
'ParticleVertexFitter_' + channel.name)
 
  382                    pvfit.param(
'listName', channel.name)
 
  383                    pvfit.param(
'confidenceLevel', channel.preCutConfig.vertexCut)
 
  384                    pvfit.param(
'vertexFitter', 
'KFit')
 
  385                    if particle.name 
in [
'pi0']:
 
  386                        pvfit.param(
'fitType', 
'mass')
 
  388                        pvfit.param(
'fitType', 
'vertex')
 
  389                    pvfit.set_log_level(basf2.logging.log_level.ERROR)  
 
  390                    path.add_module(pvfit)
 
  392                if self.config.monitor:
 
  393                    hist_variables = [
'chiProb', 
'mcErrors', 
'mcParticleStatus', channel.mvaConfig.target]
 
  394                    hist_variables_2d = [(
'chiProb', channel.mvaConfig.target),
 
  395                                         (
'chiProb', 
'mcErrors'),
 
  396                                         (
'chiProb', 
'mcParticleStatus')]
 
  397                    filename = 
'Monitor_PreReconstruction_AfterVertex.root' 
  398                    ma.variablesToHistogram(
 
  400                        variables=config.variables2binnings(hist_variables),
 
  401                        variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  403                        ignoreCommandLineOverride=
True,
 
  404                        directory=f
'{channel.label}',
 
  410class PostReconstruction:
 
  412    Steers the reconstruction phase after the mva method was applied 
  414        - The application of the mva method itself. 
  415        - Copying all channel lists in a common one 
for each particle defined 
in particles
 
  416        - Tag unique signal candidates, to avoid double counting of channels 
with overlap
 
  419    def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
 
  421        Create a new PostReconstruction object 
  422        @param particles list of config.Particle objects
 
  423        @param config config.FeiConfiguration object
 
  426        self.particles = particles 
  430    def get_missing_channels(self) -> typing.Sequence[str]:
 
  432        Returns all channels for which the weightfile 
is missing
 
  435        for particle 
in self.particles:
 
  436            for channel 
in particle.channels:
 
  438                weightfile = channel.label + 
'.xml' 
  439                if not basf2_mva.available(weightfile):
 
  440                    missing += [channel.label]
 
  443    def available(self) -> bool:
 
  445        Check if the relevant information 
is already available
 
  447        return len(self.get_missing_channels()) == 0
 
  449    def reconstruct(self) -> pybasf2.Path:
 
  451        Returns pybasf2.Path which reconstructs the particles and does the vertex fitting 
if necessary
 
  453        path = basf2.create_path() 
  455        for particle 
in self.particles:
 
  456            for channel 
in particle.channels:
 
  457                expert = basf2.register_module(
'MVAExpert')
 
  458                expert.set_name(
'MVAExpert_' + channel.name)
 
  459                if self.config.training:
 
  460                    expert.param(
'identifier', channel.label + 
'.xml')
 
  462                    expert.param(
'identifier', self.config.prefix + 
'_' + channel.label)
 
  463                expert.param(
'extraInfoName', 
'SignalProbability')
 
  464                expert.param(
'listNames', [channel.name])
 
  466                expert.set_log_level(basf2.logging.log_level.ERROR)
 
  467                path.add_module(expert)
 
  469                uniqueSignal = basf2.register_module(
'TagUniqueSignal')
 
  470                uniqueSignal.param(
'particleList', channel.name)
 
  471                uniqueSignal.param(
'target', channel.mvaConfig.target)
 
  472                uniqueSignal.param(
'extraInfoName', 
'uniqueSignal')
 
  473                uniqueSignal.set_name(
'TagUniqueSignal_' + channel.name)
 
  475                uniqueSignal.set_log_level(basf2.logging.log_level.ERROR)
 
  476                path.add_module(uniqueSignal)
 
  478                if self.config.monitor:
 
  479                    hist_variables = [
'mcErrors', 
'mcParticleStatus', 
'extraInfo(uniqueSignal)', 
'extraInfo(SignalProbability)',
 
  480                                      channel.mvaConfig.target, 
'extraInfo(decayModeID)']
 
  481                    hist_variables_2d = [(
'extraInfo(SignalProbability)', channel.mvaConfig.target),
 
  482                                         (
'extraInfo(SignalProbability)', 
'mcErrors'),
 
  483                                         (
'extraInfo(SignalProbability)', 
'mcParticleStatus'),
 
  484                                         (
'extraInfo(decayModeID)', channel.mvaConfig.target),
 
  485                                         (
'extraInfo(decayModeID)', 
'mcErrors'),
 
  486                                         (
'extraInfo(decayModeID)', 
'extraInfo(uniqueSignal)'),
 
  487                                         (
'extraInfo(decayModeID)', 
'mcParticleStatus')]
 
  488                    filename = 
'Monitor_PostReconstruction_AfterMVA.root' 
  489                    ma.variablesToHistogram(
 
  491                        variables=config.variables2binnings(hist_variables),
 
  492                        variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  494                        ignoreCommandLineOverride=
True,
 
  495                        directory=f
'{channel.label}',
 
  499            if particle.postCutConfig.value > 0.0:
 
  500                cutstring = str(particle.postCutConfig.value) + 
' < extraInfo(SignalProbability)' 
  502            ma.mergeListsWithBestDuplicate(particle.identifier, [c.name 
for c 
in particle.channels],
 
  503                                           variable=
'particleSource', writeOut=
True, path=path)
 
  505            if self.config.monitor:
 
  506                hist_variables = [
'mcErrors', 
'mcParticleStatus', 
'extraInfo(uniqueSignal)', 
'extraInfo(SignalProbability)',
 
  507                                  particle.mvaConfig.target, 
'extraInfo(decayModeID)']
 
  508                hist_variables_2d = [(
'extraInfo(decayModeID)', particle.mvaConfig.target),
 
  509                                     (
'extraInfo(decayModeID)', 
'mcErrors'),
 
  510                                     (
'extraInfo(decayModeID)', 
'mcParticleStatus')]
 
  511                filename = 
'Monitor_PostReconstruction_BeforePostCut.root' 
  512                ma.variablesToHistogram(
 
  514                    variables=config.variables2binnings(hist_variables),
 
  515                    variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  516                    filename=config.removeJPsiSlash(filename),
 
  517                    ignoreCommandLineOverride=
True,
 
  518                    directory=config.removeJPsiSlash(f
'{particle.identifier}'),
 
  521            ma.applyCuts(particle.identifier, cutstring, path=path)
 
  523            if self.config.monitor:
 
  524                filename = 
'Monitor_PostReconstruction_BeforeRanking.root' 
  525                ma.variablesToHistogram(
 
  527                    variables=config.variables2binnings(hist_variables),
 
  528                    variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  529                    filename=config.removeJPsiSlash(filename),
 
  530                    ignoreCommandLineOverride=
True,
 
  531                    directory=config.removeJPsiSlash(f
'{particle.identifier}'),
 
  534            ma.rankByHighest(particle.identifier, 
'extraInfo(SignalProbability)',
 
  535                             particle.postCutConfig.bestCandidateCut, 
'postCut_rank', path=path)
 
  537            if self.config.monitor:
 
  538                hist_variables += [
'extraInfo(postCut_rank)']
 
  539                hist_variables_2d += [(
'extraInfo(decayModeID)', 
'extraInfo(postCut_rank)'),
 
  540                                      (particle.mvaConfig.target, 
'extraInfo(postCut_rank)'),
 
  541                                      (
'mcErrors', 
'extraInfo(postCut_rank)'),
 
  542                                      (
'mcParticleStatus', 
'extraInfo(postCut_rank)')]
 
  543                filename = 
'Monitor_PostReconstruction_AfterRanking.root' 
  544                ma.variablesToHistogram(
 
  546                    variables=config.variables2binnings(hist_variables),
 
  547                    variables_2d=config.variables2binnings_2d(hist_variables_2d),
 
  548                    filename=config.removeJPsiSlash(filename),
 
  549                    ignoreCommandLineOverride=
True,
 
  550                    directory=config.removeJPsiSlash(f
'{particle.identifier}'),
 
  553                variables = [
'extraInfo(SignalProbability)', 
'mcErrors', 
'mcParticleStatus', particle.mvaConfig.target,
 
  554                             'extraInfo(uniqueSignal)', 
'extraInfo(decayModeID)']
 
  556                if 'B_s0' == particle.name:
 
  558                elif 'B' in particle.name:
 
  559                    variables += [
'Mbc', 
'cosThetaBetweenParticleAndNominalB']
 
  561                filename = 
'Monitor_Final.root' 
  562                ma.variablesToNtuple(
 
  565                    treename=config.removeJPsiSlash(f
'{particle.identifier} variables'),
 
  566                    filename=config.removeJPsiSlash(filename),
 
  567                    ignoreCommandLineOverride=
True,
 
  574    Performs all necessary trainings for all training data files which are
 
  575    available but where there 
is no weight file available yet.
 
  576    This 
class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
 
  577    The trainings are run 
in parallel using multi-threading of python.
 
  578    Each training 
is done by a subprocess call, the training command (passed by config.externTeacher) can be either
 
  579      * basf2_mva_teacher, the training will be done directly on the machine
 
  580      * externClustTeacher, the training will be submitted to the batch system of KEKCC
 
  584    MaximumNumberOfMVASamples = int(1e7) 
  587    MinimumNumberOfMVASamples = int(5e2) 
  589    def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
 
  591        Create a new Teacher object 
  592        @param particles list of config.Particle objects
 
  593        @param config config.FeiConfiguration object
 
  596        self.particles = particles 
  601    def create_fake_weightfile(channel: str):
 
  603        Create a fake weight file using the trivial method, it will always return 0.0
 
  604        @param channel 
for which we create a fake weight file
 
  607            <?xml version="1.0" encoding=
"utf-8"?>
 
  608            <method>Trivial</method>
 
  609            <weightfile>{channel}.xml</weightfile>
 
  610            <treename>tree</treename>
 
  611            <target_variable>isSignal</target_variable>
 
  612            <weight_variable>__weight__</weight_variable>
 
  613            <signal_class>1</signal_class>
 
  614            <max_events>0</max_events>
 
  615            <number_feature_variables>1</number_feature_variables>
 
  616            <variable0>M</variable0>
 
  617            <number_spectator_variables>0</number_spectator_variables>
 
  618            <number_data_files>1</number_data_files>
 
  619            <datafile0>train.root</datafile0>
 
  620            <Trivial_version>1</Trivial_version>
 
  621            <Trivial_output>0</Trivial_output>
 
  622            <signal_fraction>0.066082567</signal_fraction>
 
  624        with open(channel + 
".xml", 
"w") 
as f:
 
  628    def check_if_weightfile_is_fake(filename: str):
 
  630        Checks if the provided filename 
is a fake-weight file 
or not 
  631        @param filename the filename of the weight file
 
  634            return '<method>Trivial</method>' in open(filename).readlines()[2]
 
  635        except BaseException:
 
  639    def upload(self, channel: str):
 
  641        Upload the weight file into the condition database 
  642        @param channel whose weight file 
is uploaded
 
  644        disk = channel + '.xml' 
  645        dbase = self.config.prefix + 
'_' + channel
 
  646        basf2_mva.upload(disk, dbase)
 
  649    def do_all_trainings(self):
 
  651        Do all trainings for which we find training data
 
  657        ROOT.PyConfig.StartGuiThread = 
False 
  659        filename = 
'training_input.root' 
  660        if not os.path.isfile(filename):
 
  661            B2WARNING(
"Training of MVC failed. Couldn't find ROOT file. " 
  662                      "No weight files will be provided.")
 
  664            f = ROOT.TFile.Open(filename, 
'read')
 
  666                B2WARNING(
"Training of MVC failed. ROOT file corrupt. " 
  667                          "No weight files will be provided.")
 
  668            elif len([k.GetName() 
for k 
in f.GetListOfKeys()]) == 0:
 
  669                B2WARNING(
"Training of MVC failed. ROOT file does not contain any trees. " 
  670                          "No weight files will be provided.")
 
  672                for particle 
in self.particles:
 
  673                    for channel 
in particle.channels:
 
  674                        weightfile = channel.label + 
'.xml' 
  675                        if not basf2_mva.available(weightfile):
 
  676                            keys = [m 
for m 
in f.GetListOfKeys() 
if f
"{channel.label}" in m.GetName()]
 
  679                            tree = keys[0].ReadObj()
 
  680                            nSig = tree.GetEntries(channel.mvaConfig.target + 
' == 1.0')
 
  681                            nBg = tree.GetEntries(channel.mvaConfig.target + 
' != 1.0')
 
  682                            if nSig < Teacher.MinimumNumberOfMVASamples:
 
  683                                B2WARNING(
"Training of MVC failed. " 
  684                                          f
"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
 
  685                                self.create_fake_weightfile(channel.label)
 
  686                                self.upload(channel.label)
 
  688                            if nBg < Teacher.MinimumNumberOfMVASamples:
 
  689                                B2WARNING(
"Training of MVC failed. " 
  690                                          f
"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
 
  691                                self.create_fake_weightfile(channel.label)
 
  692                                self.upload(channel.label)
 
  694                            variable_str = 
"' '".join(channel.mvaConfig.variables)
 
  696                            command = (f
"{self.config.externTeacher}" 
  697                                       f
" --method '{channel.mvaConfig.method}'" 
  698                                       f
" --target_variable '{channel.mvaConfig.target}'" 
  699                                       f
" --treename '{channel.label} variables' --datafile 'training_input.root'" 
  700                                       f
" --signal_class 1 --variables '{variable_str}'" 
  701                                       f
" --identifier '{channel.label}.xml'" 
  702                                       f
" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1")
 
  703                            B2INFO(f
"Used following command to invoke teacher: \n {command}")
 
  704                            job_list.append((channel.label, command))
 
  706        p = multiprocessing.Pool(
None, maxtasksperchild=1)
 
  707        func = functools.partial(subprocess.call, shell=
True)
 
  708        p.map(func, [c 
for _, c 
in job_list])
 
  712        for name, _ 
in job_list:
 
  713            if not basf2_mva.available(name + 
'.xml'):
 
  714                B2WARNING(
"Training of MVC failed. For unknown reasons, check the logfile")
 
  715                self.create_fake_weightfile(name)
 
  716            weightfiles.append(self.upload(name))
 
  720def convert_legacy_training(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
 
  722    Convert an old FEI training into the new format. 
  723    The old format used hashes for the weight files, the hashes can be converted to the new naming scheme
 
  724    using the Summary.pickle file outputted by the FEIv3. This file must be passes by the parameter configuration.legacy.
 
  725    @param particles list of config.Particle objects
 
  726    @param config config.FeiConfiguration object
 
  728    summary = pickle.load(open(configuration.legacy, 'rb'))
 
  729    channel2lists = {k: v[2] 
for k, v 
in summary[
'channel2lists'].items()}
 
  731    teacher = Teacher(particles, configuration)
 
  733    for particle 
in particles:
 
  734        for channel 
in particle.channels:
 
  735            new_weightfile = configuration.prefix + 
'_' + channel.label
 
  736            old_weightfile = configuration.prefix + 
'_' + channel2lists[channel.label.replace(
'Jpsi', 
'J/psi')]
 
  737            if not basf2_mva.available(new_weightfile):
 
  738                if old_weightfile 
is None or not basf2_mva.available(old_weightfile):
 
  739                    Teacher.create_fake_weightfile(channel.label)
 
  740                    teacher.upload(channel.label)
 
  742                    basf2_mva.download(old_weightfile, channel.label + 
'.xml')
 
  743                    teacher.upload(channel.label)
 
  746def get_stages_from_particles(particles: typing.Sequence[config.Particle]):
 
  748    Returns the hierarchical structure of the FEI. 
  749    Each stage depends on the particles in the previous stage.
 
  750    The final stage 
is empty (meaning everything 
is done, 
and the training 
is finished at this point).
 
  751    @param particles list of config.Particle objects
 
  754        [p for p 
in particles 
if p.name 
in [
'e+', 
'K+', 
'pi+', 
'mu+', 
'gamma', 
'p+', 
'K_L0']],
 
  755        [p 
for p 
in particles 
if p.name 
in [
'pi0', 
'J/psi', 
'Lambda0']],
 
  756        [p 
for p 
in particles 
if p.name 
in [
'K_S0', 
'Sigma+']],
 
  757        [p 
for p 
in particles 
if p.name 
in [
'D+', 
'D0', 
'D_s+', 
'Lambda_c+']],
 
  758        [p 
for p 
in particles 
if p.name 
in [
'D*+', 
'D*0', 
'D_s*+']],
 
  759        [p 
for p 
in particles 
if p.name 
in [
'B0', 
'B+', 
'B_s0']],
 
  764        if p.name 
not in [p.name 
for stage 
in stages 
for p 
in stage]:
 
  765            raise RuntimeError(f
"Unknown particle {p.name}: Not implemented in FEI")
 
  770def do_trainings(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
 
  772    Performs the training of mva classifiers for all available training data,
 
  773    this function must be either called by the user after each stage of the FEI during training,
 
  774    or (more likely) 
is called by the distributed.py script after  merging the outputs of all jobs,
 
  775    @param particles list of config.Particle objects
 
  776    @param config config.FeiConfiguration object
 
  777    @return list of tuple 
with weight file on disk 
and identifier 
in database 
for all trained classifiers
 
  779    teacher = Teacher(particles, configuration) 
  780    return teacher.do_all_trainings()
 
  783def save_summary(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration, cache: int):
 
  785    Creates the Summary.pickle, which is used to keep track of the stage during the training,
 
  786    and can be used later to investigate which configuration was used exactly to create the training.
 
  787    @param particles list of config.Particle objects
 
  788    @param config config.FeiConfiguration object
 
  789    @param cache current cache level
 
  791    configuration = config.FeiConfiguration(configuration.prefix, cache, 
  792                                            configuration.monitor, configuration.legacy, configuration.externTeacher, 
  793                                            configuration.training) 
  795    for i 
in [8, 7, 6, 5, 4, 3, 2, 1, 0]:
 
  796        if os.path.isfile(f
'Summary.pickle.backup_{i}'):
 
  797            shutil.copyfile(f
'Summary.pickle.backup_{i}', f
'Summary.pickle.backup_{i + 1}')
 
  798    if os.path.isfile(
'Summary.pickle'):
 
  799        shutil.copyfile(
'Summary.pickle', 
'Summary.pickle.backup_0')
 
  800    pickle.dump((particles, configuration), open(
'Summary.pickle', 
'wb'))
 
  803def get_path(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration) -> FeiState:
 
  805    The most important function of the FEI. 
  806    This creates the FEI path for training/fitting (both terms are equal), 
and application/inference (both terms are equal).
 
  807    The whole FEI 
is defined by the particles which are reconstructed (see default_channels.py)
 
  808    and the configuration (see config.py).
 
  811    For training this function 
is called multiple times, each time the FEI reconstructs one more stage 
in the hierarchical structure
 
  812    i.e. we start 
with FSP, pi0, KS_0, D, D*, 
and with B mesons. You have to set configuration.training to 
True for training mode.
 
  813    All weight files created during the training will be stored 
in your local database.
 
  814    If you want to use the FEI training everywhere without copying this database by hand, you have to upload your local database
 
  815    to the central database first (see documentation 
for the Belle2 Condition Database).
 
  818    For application you call this function once, 
and it returns the whole path which will reconstruct B mesons
 
  819    with an associated signal probability. You have to set configuration.training to 
False for application mode.
 
  822    You can always turn on the monitoring (configuration.monitor = 
True),
 
  823    to write out ROOT Histograms of many quantities 
for each stage,
 
  824    using these histograms you can use the printReporting.py 
or latexReporting.py scripts to automatically create pdf files.
 
  827    This function can also use old FEI trainings (version 3), just 
pass the Summary.pickle file of the old training,
 
  828    and the weight files will be automatically converted to the new naming scheme.
 
  830    @param particles list of config.Particle objects
 
  831    @param config config.FeiConfiguration object
 
  834    ____ _  _ _    _       ____ _  _ ____ _  _ ___    _ _  _ ___ ____ ____ ___  ____ ____ ___ ____ ___ _ ____ _  _ 
  835    |___ |  | |    |       |___ |  | |___ |\ |  |     | |\ |  |  |___ |__/ |__] |__/ |___  |  |__|  |  | |  | |\ | 
  836    |    |__| |___ |___    |___  \/  |___ | \|  |     | | \|  |  |___ |  \ |    |  \ |___  |  |  |  |  | |__| | \| 
  838    Author: Thomas Keck 2014 - 2017 
  839    Please cite my PhD thesis 
  848    if configuration.cache 
is None:
 
  849        if os.path.isfile(
'Summary.pickle'):
 
  850            print(
"Cache: Replaced particles and configuration with the ones from Summary.pickle!")
 
  851            particles, configuration = pickle.load(open(
'Summary.pickle', 
'rb'))
 
  852            cache = configuration.cache
 
  854            if configuration.training:
 
  859        cache = configuration.cache
 
  862    path = basf2.create_path()
 
  867    stages = get_stages_from_particles(particles)
 
  872    if configuration.legacy 
is not None:
 
  873        convert_legacy_training(particles, configuration)
 
  881    training_data_information = TrainingDataInformation(particles)
 
  882    if cache < 0 
and configuration.training:
 
  883        print(
"Stage 0: Run over all files to count the number of events and McParticles")
 
  884        path.add_path(training_data_information.reconstruct())
 
  885        if configuration.training:
 
  886            save_summary(particles, configuration, 0)
 
  887        return FeiState(path, 0, [])
 
  888    elif not configuration.training 
and configuration.monitor:
 
  889        path.add_path(training_data_information.reconstruct())
 
  895    loader = FSPLoader(particles, configuration)
 
  897        print(
"Stage 0: Load FSP particles")
 
  898        path.add_path(loader.reconstruct())
 
  921    for stage, stage_particles 
in enumerate(stages):
 
  922        pre_reconstruction = PreReconstruction(stage_particles, configuration)
 
  924            print(f
"Stage {stage}: PreReconstruct particles: ", [p.name 
for p 
in stage_particles])
 
  925            path.add_path(pre_reconstruction.reconstruct())
 
  927        post_reconstruction = PostReconstruction(stage_particles, configuration)
 
  928        if configuration.training 
and not post_reconstruction.available():
 
  929            print(f
"Stage {stage}: Create training data for particles: ", [p.name 
for p 
in stage_particles])
 
  930            mc_counts = training_data_information.get_mc_counts()
 
  931            training_data = TrainingData(stage_particles, configuration, mc_counts)
 
  932            path.add_path(training_data.reconstruct())
 
  933            used_lists += [channel.name 
for particle 
in stage_particles 
for channel 
in particle.channels]
 
  935        if cache <= stage + 1:
 
  936            path.add_path(post_reconstruction.reconstruct())
 
  937        used_lists += [particle.identifier 
for particle 
in stage_particles]
 
  941    if configuration.monitor:
 
  942        output = basf2.register_module(
'RootOutput')
 
  943        output.param(
'outputFileName', 
'Monitor_ModuleStatistics.root')
 
  944        output.param(
'branchNames', [
'EventMetaData'])  
 
  945        output.param(
'branchNamesPersistent', [
'ProcessStatistics'])
 
  946        output.param(
'ignoreCommandLineOverride', 
True)
 
  947        path.add_module(output)
 
  951    if configuration.training 
or configuration.monitor:
 
  952        save_summary(particles, configuration, stage + 1)
 
  955    return FeiState(path, stage + 1, plists=used_lists)